diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..192e95f84aeb74caf1bfb6bd38ebdeee1e6ed5ac 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +docs/insight.jpg filter=lfs diff=lfs merge=lfs -text +docs/insight.pdf filter=lfs diff=lfs merge=lfs -text +docs/style.jpg filter=lfs diff=lfs merge=lfs -text +docs/texture.jpg filter=lfs diff=lfs merge=lfs -text +docs/vis.jpg filter=lfs diff=lfs merge=lfs -text +docs/walking.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/Dassl.pytorch/.DS_Store b/Dassl.pytorch/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6769932d5e60543bc290461c6791d727ff478bc2 Binary files /dev/null and b/Dassl.pytorch/.DS_Store differ diff --git a/Dassl.pytorch/.flake8 b/Dassl.pytorch/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..ac13c77e7cae2d8cc90e09302ec14fbdd3167c54 --- /dev/null +++ b/Dassl.pytorch/.flake8 @@ -0,0 +1,24 @@ +[flake8] +ignore = + # At least two spaces before inline comment + E261, + # Line lengths are recommended to be no greater than 79 characters + E501, + # Missing whitespace around arithmetic operator + E226, + # Blank line contains whitespace + W293, + # Do not use bare 'except' + E722, + # Line break after binary operator + W504, + # Too many leading '#' for block comment + E266, + # Line break before binary operator + W503, + # Continuation line over-indented for hanging indent + E126, + # Module level import not at top of file + E402 +max-line-length = 79 +exclude = __init__.py, build \ No newline at end of file diff --git a/Dassl.pytorch/.gitignore b/Dassl.pytorch/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e8bcb640adffb377612133a42db91290194a5a3b --- /dev/null +++ b/Dassl.pytorch/.gitignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# OS X +.DS_Store +.Spotlight-V100 +.Trashes +._* + +# This project +output/ +debug/ diff --git a/Dassl.pytorch/.isort.cfg b/Dassl.pytorch/.isort.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6b019a3d30cc959266834870194373c8cbedd184 --- /dev/null +++ b/Dassl.pytorch/.isort.cfg @@ -0,0 +1,10 @@ +[isort] +line_length=79 +multi_line_output=6 +length_sort=true +known_standard_library=numpy,setuptools +known_myself=dassl +known_third_party=matplotlib,cv2,torch,torchvision,PIL,yacs,scipy,gdown +no_lines_before=STDLIB,THIRDPARTY +sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER +default_section=FIRSTPARTY \ No newline at end of file diff --git a/Dassl.pytorch/.style.yapf b/Dassl.pytorch/.style.yapf new file mode 100644 index 0000000000000000000000000000000000000000..5d8b5f5c4ff185aba83ea51b1e58e787f5cb6cd1 --- /dev/null +++ b/Dassl.pytorch/.style.yapf @@ -0,0 +1,7 @@ +[style] +BASED_ON_STYLE = pep8 +BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true +SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true +DEDENT_CLOSING_BRACKETS = true +SPACES_BEFORE_COMMENT = 2 +ARITHMETIC_PRECEDENCE_INDICATION = true \ No newline at end of file diff --git a/Dassl.pytorch/DATASETS.md b/Dassl.pytorch/DATASETS.md new file mode 100644 index 0000000000000000000000000000000000000000..27ad5099b5b50171d9502352fde9f4b1d2237240 --- /dev/null +++ b/Dassl.pytorch/DATASETS.md @@ -0,0 +1,318 @@ +# How to Install Datasets + +`$DATA` denotes the location where datasets are installed, e.g. + +``` +$DATA/ +|–– office31/ +|–– office_home/ +|–– visda17/ +``` + +[Domain Adaptation](#domain-adaptation) +- [Office-31](#office-31) +- [Office-Home](#office-home) +- [VisDA17](#visda17) +- [CIFAR10-STL10](#cifar10-stl10) +- [Digit-5](#digit-5) +- [DomainNet](#domainnet) +- [miniDomainNet](#miniDomainNet) + +[Domain Generalization](#domain-generalization) +- [PACS](#pacs) +- [VLCS](#vlcs) +- [Office-Home-DG](#office-home-dg) +- [Digits-DG](#digits-dg) +- [Digit-Single](#digit-single) +- [CIFAR-10-C](#cifar-10-c) +- [CIFAR-100-C](#cifar-100-c) +- [WILDS](#wilds) + +[Semi-Supervised Learning](#semi-supervised-learning) +- [CIFAR10/100 and SVHN](#cifar10100-and-svhn) +- [STL10](#stl10) + +## Domain Adaptation + +### Office-31 + +Download link: https://people.eecs.berkeley.edu/~jhoffman/domainadapt/#datasets_code. + +File structure: + +``` +office31/ +|–– amazon/ +| |–– back_pack/ +| |–– bike/ +| |–– ... +|–– dslr/ +| |–– back_pack/ +| |–– bike/ +| |–– ... +|–– webcam/ +| |–– back_pack/ +| |–– bike/ +| |–– ... +``` + +Note that within each domain folder you need to move all class folders out of the `images/` folder and then delete the `images/` folder. + +### Office-Home + +Download link: http://hemanthdv.org/OfficeHome-Dataset/. + +File structure: + +``` +office_home/ +|–– art/ +|–– clipart/ +|–– product/ +|–– real_world/ +``` + +### VisDA17 + +Download link: http://ai.bu.edu/visda-2017/. + +The dataset can also be downloaded using our script at `datasets/da/visda17.sh`. Run the following command in your terminal under `Dassl.pytorch/datasets/da`, + +```bash +sh visda17.sh $DATA +``` + +Once the download is finished, the file structure will look like + +``` +visda17/ +|–– train/ +|–– test/ +|–– validation/ +``` + +### CIFAR10-STL10 + +Run the following command in your terminal under `Dassl.pytorch/datasets/da`, + +```bash +python cifar_stl.py $DATA/cifar_stl +``` + +This will create a folder named `cifar_stl` under `$DATA`. The file structure will look like + +``` +cifar_stl/ +|–– cifar/ +| |–– train/ +| |–– test/ +|–– stl/ +| |–– train/ +| |–– test/ +``` + +Note that only 9 classes shared by both datasets are kept. + +### Digit-5 + +Create a folder `$DATA/digit5` and download to this folder the dataset from [here](https://github.com/VisionLearningGroup/VisionLearningGroup.github.io/tree/master/M3SDA/code_MSDA_digit#digit-five-download). This should give you + +``` +digit5/ +|–– Digit-Five/ +``` + +Then, run the following command in your terminal under `Dassl.pytorch/datasets/da`, + +```bash +python digit5.py $DATA/digit5 +``` + +This will extract the data and organize the file structure as + +``` +digit5/ +|–– Digit-Five/ +|–– mnist/ +|–– mnist_m/ +|–– usps/ +|–– svhn/ +|–– syn/ +``` + +### DomainNet + +Download link: http://ai.bu.edu/M3SDA/. (Please download the cleaned version of split files) + +File structure: + +``` +domainnet/ +|–– clipart/ +|–– infograph/ +|–– painting/ +|–– quickdraw/ +|–– real/ +|–– sketch/ +|–– splits/ +| |–– clipart_train.txt +| |–– clipart_test.txt +| |–– ... +``` + +### miniDomainNet + +You need to download the DomainNet dataset first. The miniDomainNet's split files can be downloaded at this [google drive](https://drive.google.com/open?id=15rrLDCrzyi6ZY-1vJar3u7plgLe4COL7). After the zip file is extracted, you should have the folder `$DATA/domainnet/splits_mini/`. + +## Domain Generalization + +### PACS + +Download link: [google drive](https://drive.google.com/open?id=1m4X4fROCCXMO0lRLrr6Zz9Vb3974NWhE). + +File structure: + +``` +pacs/ +|–– images/ +|–– splits/ +``` + +You do not necessarily have to manually download this dataset. Once you run ``tools/train.py``, the code will detect if the dataset exists or not and automatically download the dataset to ``$DATA`` if missing. This also applies to VLCS, Office-Home-DG, and Digits-DG. + +### VLCS + +Download link: [google drive](https://drive.google.com/file/d/1r0WL5DDqKfSPp9E3tRENwHaXNs1olLZd/view?usp=sharing) (credit to https://github.com/fmcarlucci/JigenDG#vlcs) + +File structure: + +``` +VLCS/ +|–– CALTECH/ +|–– LABELME/ +|–– PASCAL/ +|–– SUN/ +``` + +### Office-Home-DG + +Download link: [google drive](https://drive.google.com/open?id=1gkbf_KaxoBws-GWT3XIPZ7BnkqbAxIFa). + +File structure: + +``` +office_home_dg/ +|–– art/ +|–– clipart/ +|–– product/ +|–– real_world/ +``` + +### Digits-DG + +Download link: [google driv](https://drive.google.com/open?id=15V7EsHfCcfbKgsDmzQKj_DfXt_XYp_P7). + +File structure: + +``` +digits_dg/ +|–– mnist/ +|–– mnist_m/ +|–– svhn/ +|–– syn/ +``` + +### Digit-Single +Follow the steps for [Digit-5](#digit-5) to organize the dataset. + +### CIFAR-10-C + +First download the CIFAR-10-C dataset from https://zenodo.org/record/2535967#.YFxHEWQzb0o to, e.g., $DATA, and extract the file under the same directory. Then, navigate to `Dassl.pytorch/datasets/dg` and run the following command in your terminal +```bash +python cifar_c.py $DATA/CIFAR-10-C +``` +where the first argument denotes the path to the (uncompressed) CIFAR-10-C dataset. + +The script will extract images from the `.npy` files and save them to `cifar10_c/` created under $DATA. The file structure will look like +``` +cifar10_c/ +|–– brightness/ +| |–– 1/ # 5 intensity levels in total +| |–– 2/ +| |–– 3/ +| |–– 4/ +| |–– 5/ +|–– ... # 19 corruption types in total +``` + +Note that `cifar10_c/` only contains the test images. The training images are the normal CIFAR-10 images. See [CIFAR10/100 and SVHN](#cifar10100-and-svhn) for how to prepare the CIFAR-10 dataset. + +### CIFAR-100-C + +First download the CIFAR-100-C dataset from https://zenodo.org/record/3555552#.YFxpQmQzb0o to, e.g., $DATA, and extract the file under the same directory. Then, navigate to `Dassl.pytorch/datasets/dg` and run the following command in your terminal +```bash +python cifar_c.py $DATA/CIFAR-100-C +``` +where the first argument denotes the path to the (uncompressed) CIFAR-100-C dataset. + +The script will extract images from the `.npy` files and save them to `cifar100_c/` created under $DATA. The file structure will look like +``` +cifar100_c/ +|–– brightness/ +| |–– 1/ # 5 intensity levels in total +| |–– 2/ +| |–– 3/ +| |–– 4/ +| |–– 5/ +|–– ... # 19 corruption types in total +``` + +Note that `cifar100_c/` only contains the test images. The training images are the normal CIFAR-100 images. See [CIFAR10/100 and SVHN](#cifar10100-and-svhn) for how to prepare the CIFAR-100 dataset. + +### WILDS + +No action is required to preprocess WILDS's datasets. The code will automatically download the data. + +## Semi-Supervised Learning + +### CIFAR10/100 and SVHN + +Run the following command in your terminal under `Dassl.pytorch/datasets/ssl`, + +```bash +python cifar10_cifar100_svhn.py $DATA +``` + +This will create three folders under `$DATA`, i.e. + +``` +cifar10/ +|–– train/ +|–– test/ +cifar100/ +|–– train/ +|–– test/ +svhn/ +|–– train/ +|–– test/ +``` + +### STL10 + +Run the following command in your terminal under `Dassl.pytorch/datasets/ssl`, + +```bash +python stl10.py $DATA/stl10 +``` + +This will create a folder named `stl10` under `$DATA` and extract the data into three folders, i.e. `train`, `test` and `unlabeled`. Then, download from http://ai.stanford.edu/~acoates/stl10/ the "Binary files" and extract it under `stl10`. + +The file structure will look like + +``` +stl10/ +|–– train/ +|–– test/ +|–– unlabeled/ +|–– stl10_binary/ +``` \ No newline at end of file diff --git a/Dassl.pytorch/LICENSE b/Dassl.pytorch/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..691961454eebc1ee8f9554cd47322c55b117f725 --- /dev/null +++ b/Dassl.pytorch/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Kaiyang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Dassl.pytorch/README.md b/Dassl.pytorch/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6f3ec6f3f157e8fe659ac71f91add34aa1318544 --- /dev/null +++ b/Dassl.pytorch/README.md @@ -0,0 +1,279 @@ +# Dassl + +## Introduction + +Dassl is a [PyTorch](https://pytorch.org) toolbox initially developed for our project [Domain Adaptive Ensemble Learning (DAEL)](https://arxiv.org/abs/2003.07325) to support research in domain adaptation and generalization---since in DAEL we study how to unify these two problems in a single learning framework. Given that domain adaptation is closely related to semi-supervised learning---both study how to exploit unlabeled data---we also incorporate components that support research for the latter. + +Why the name "Dassl"? Dassl combines the initials of domain adaptation (DA) and semi-supervised learning (SSL), which sounds natural and informative. + +Dassl has a modular design and unified interfaces, allowing fast prototyping and experimentation of new DA/DG/SSL methods. With Dassl, a new method can be implemented with only a few lines of code. Don't believe? Take a look at the [engine](https://github.com/KaiyangZhou/Dassl.pytorch/tree/master/dassl/engine) folder, which contains the implementations of many existing methods (then you will come back and star this repo). :-) + +Basically, Dassl is perfect for doing research in the following areas: +- Domain adaptation +- Domain generalization +- Semi-supervised learning + +BUT, thanks to the neat design, Dassl can also be used as a codebase to develop any deep learning projects, like [this](https://github.com/KaiyangZhou/CoOp). :-) + +A drawback of Dassl is that it doesn't (yet? hmm) support distributed multi-GPU training (Dassl uses `DataParallel` to wrap a model, which is less efficient than `DistributedDataParallel`). + +We don't provide detailed documentations for Dassl, unlike another [project](https://kaiyangzhou.github.io/deep-person-reid/) of ours. This is because Dassl is developed for research purpose and as a researcher, we think it's important to be able to read source code and we highly encourage you to do so---definitely not because we are lazy. :-) + +## What's new +- **[Oct 2022]** New paper "[On-Device Domain Generalization](https://arxiv.org/abs/2209.07521)" is out! Code, models and datasets: https://github.com/KaiyangZhou/on-device-dg. + +
+ More + +- **[Jun 2022]** `v0.6.0`: Make `cfg.TRAINER.METHOD_NAME` consistent with the method class name. +- **[Jun 2022]** A new domain adaptation method [CDAC (CVPR'21)](https://openaccess.thecvf.com/content/CVPR2021/papers/Li_Cross-Domain_Adaptive_Clustering_for_Semi-Supervised_Domain_Adaptation_CVPR_2021_paper.pdf) is added by [Shreejal Trivedi](https://github.com/shreejalt). See [here](https://github.com/KaiyangZhou/Dassl.pytorch/pull/44) for more details. +- **[Jun 2022]** Adds three datasets from the [WILDS](https://wilds.stanford.edu/) benchmark: iWildCam, FMoW and Camelyon17. See [here](https://github.com/KaiyangZhou/Dassl.pytorch/commit/7f7eab8e22f6e176b97a539100eca12d6a403909) for more details. +- **[May 2022]** A new domain generalization method [DDG](https://arxiv.org/abs/2205.13913) developed by [Zhishu Sun](https://github.com/siaimes) and to appear at IJCAI'22 is added to this repo. See [here](https://github.com/MetaVisionLab/DDG) for more details. +- **[Mar 2022]** A new domain generalization method [EFDM](https://arxiv.org/abs/2203.07740) developed by [Yabin Zhang (PolyU)](https://ybzh.github.io/) and to appear at CVPR'22 is added to this repo. See [here](https://github.com/KaiyangZhou/Dassl.pytorch/pull/36) for more details. +- **[Feb 2022]** In case you don't know, a class in the painting domain of DomainNet (the official splits) only has test images (no training images), which could affect performance. See section 4.a in our [paper](https://arxiv.org/abs/2003.07325) for more details. +- **[Oct 2021]** `v0.5.0`: **Important changes** made to `transforms.py`. 1) `center_crop` becomes a default transform in testing (applied after resizing the smaller edge to a certain size to keep the image aspect ratio). 2) For training, `Resize(cfg.INPUT.SIZE)` is deactivated when `random_crop` or `random_resized_crop` is used. These changes won't make any difference to the training transforms used in existing config files, nor to the testing transforms unless the raw images are not squared (the only difference is that now the image aspect ratio is respected). +- **[Oct 2021]** `v0.4.3`: Copy the attributes in `self.dm` (data manager) to `SimpleTrainer` and make `self.dm` optional, which means from now on, you can build data loaders from any source you like rather than being forced to use `DataManager`. +- **[Sep 2021]** `v0.4.2`: An important update is to set `drop_last=is_train and len(data_source)>=batch_size` when constructing a data loader to avoid 0-length. + +
+ +## Overview + +Dassl has implemented the following methods: + +- Single-source domain adaptation + - [Cross Domain Adaptive Clustering for Semi Supervised Domain Adaptation (CVPR'21)](https://arxiv.org/pdf/2104.09415.pdf) [[dassl/engine/da/cdac.py](dassl/engine/da/cdac.py)] + - [Semi-supervised Domain Adaptation via Minimax Entropy (ICCV'19)](https://arxiv.org/abs/1904.06487) [[dassl/engine/da/mme.py](dassl/engine/da/mme.py)] + - [Maximum Classifier Discrepancy for Unsupervised Domain Adaptation (CVPR'18)](https://arxiv.org/abs/1712.02560https://arxiv.org/abs/1712.02560) [[dassl/engine/da/mcd.py](dassl/engine/da/mcd.py)] + - [Self-ensembling for visual domain adaptation (ICLR'18)](https://arxiv.org/abs/1706.05208) [[dassl/engine/da/self_ensembling.py](dassl/engine/da/self_ensembling.py)] + - [Revisiting Batch Normalization For Practical Domain Adaptation (ICLR-W'17)](https://arxiv.org/abs/1603.04779) [[dassl/engine/da/adabn.py](dassl/engine/da/adabn.py)] + - [Adversarial Discriminative Domain Adaptation (CVPR'17)](https://arxiv.org/abs/1702.05464) [[dassl/engine/da/adda.py](dassl/engine/da/adda.py)] + - [Domain-Adversarial Training of Neural Networks (JMLR'16) ](https://arxiv.org/abs/1505.07818) [[dassl/engine/da/dann.py](dassl/engine/da/dann.py)] + +- Multi-source domain adaptation + - [Domain Aadaptive Ensemble Learning](https://arxiv.org/abs/2003.07325) [[dassl/engine/da/dael.py](dassl/engine/da/dael.py)] + - [Moment Matching for Multi-Source Domain Adaptation (ICCV'19)](https://arxiv.org/abs/1812.01754) [[dassl/engine/da/m3sda.py](dassl/engine/da/m3sda.py)] + +- Domain generalization + - [Dynamic Domain Generalization (IJCAI'22)](https://arxiv.org/abs/2205.13913) [[dassl/modeling/backbone/resnet_dynamic.py](dassl/modeling/backbone/resnet_dynamic.py)] [[dassl/engine/dg/domain_mix.py](dassl/engine/dg/domain_mix.py)] + - [Exact Feature Distribution Matching for Arbitrary Style Transfer and Domain Generalization (CVPR'22)](https://arxiv.org/abs/2203.07740) [[dassl/modeling/ops/efdmix.py](dassl/modeling/ops/efdmix.py)] + - [Domain Generalization with MixStyle (ICLR'21)](https://openreview.net/forum?id=6xHJ37MVxxp) [[dassl/modeling/ops/mixstyle.py](dassl/modeling/ops/mixstyle.py)] + - [Deep Domain-Adversarial Image Generation for Domain Generalisation (AAAI'20)](https://arxiv.org/abs/2003.06054) [[dassl/engine/dg/ddaig.py](dassl/engine/dg/ddaig.py)] + - [Generalizing Across Domains via Cross-Gradient Training (ICLR'18)](https://arxiv.org/abs/1804.10745) [[dassl/engine/dg/crossgrad.py](dassl/engine/dg/crossgrad.py)] + +- Semi-supervised learning + - [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://arxiv.org/abs/2001.07685) [[dassl/engine/ssl/fixmatch.py](dassl/engine/ssl/fixmatch.py)] + - [MixMatch: A Holistic Approach to Semi-Supervised Learning (NeurIPS'19)](https://arxiv.org/abs/1905.02249) [[dassl/engine/ssl/mixmatch.py](dassl/engine/ssl/mixmatch.py)] + - [Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results (NeurIPS'17)](https://arxiv.org/abs/1703.01780) [[dassl/engine/ssl/mean_teacher.py](dassl/engine/ssl/mean_teacher.py)] + - [Semi-supervised Learning by Entropy Minimization (NeurIPS'04)](http://papers.nips.cc/paper/2740-semi-supervised-learning-by-entropy-minimization.pdf) [[dassl/engine/ssl/entmin.py](dassl/engine/ssl/entmin.py)] + +*Feel free to make a [PR](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) to add your methods here to make it easier for others to benchmark!* + +Dassl supports the following datasets: + +- Domain adaptation + - [Office-31](https://scalable.mpi-inf.mpg.de/files/2013/04/saenko_eccv_2010.pdf) + - [Office-Home](http://hemanthdv.org/OfficeHome-Dataset/) + - [VisDA17](http://ai.bu.edu/visda-2017/) + - [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html)-[STL10](https://cs.stanford.edu/~acoates/stl10/) + - [Digit-5](https://github.com/VisionLearningGroup/VisionLearningGroup.github.io/tree/master/M3SDA/code_MSDA_digit#digit-five-download) + - [DomainNet](http://ai.bu.edu/M3SDA/) + - [miniDomainNet](https://arxiv.org/abs/2003.07325) + +- Domain generalization + - [PACS](https://arxiv.org/abs/1710.03077) + - [VLCS](https://people.csail.mit.edu/torralba/publications/datasets_cvpr11.pdf) + - [Office-Home](http://hemanthdv.org/OfficeHome-Dataset/) + - [Digits-DG](https://arxiv.org/abs/2003.06054) + - [Digit-Single](https://arxiv.org/abs/1805.12018) + - [CIFAR-10-C](https://arxiv.org/abs/1807.01697) + - [CIFAR-100-C](https://arxiv.org/abs/1807.01697) + - [iWildCam-WILDS](https://wilds.stanford.edu/datasets/#iwildcam) + - [Camelyon17-WILDS](https://wilds.stanford.edu/datasets/#camelyon17) + - [FMoW-WILDS](https://wilds.stanford.edu/datasets/#fmow) + +- Semi-supervised learning + - [CIFAR10/100](https://www.cs.toronto.edu/~kriz/cifar.html.) + - [SVHN](http://ufldl.stanford.edu/housenumbers/) + - [STL10](https://cs.stanford.edu/~acoates/stl10/) + +## Get started + +### Installation + +Make sure [conda](https://www.anaconda.com/distribution/) is installed properly. + +```bash +# Clone this repo +git clone https://github.com/KaiyangZhou/Dassl.pytorch.git +cd Dassl.pytorch/ + +# Create a conda environment +conda create -y -n dassl python=3.8 + +# Activate the environment +conda activate dassl + +# Install torch (requires version >= 1.8.1) and torchvision +# Please refer to https://pytorch.org/ if you need a different cuda version +conda install pytorch torchvision cudatoolkit=10.2 -c pytorch + +# Install dependencies +pip install -r requirements.txt + +# Install this library (no need to re-build if the source code is modified) +python setup.py develop +``` + +Follow the instructions in [DATASETS.md](./DATASETS.md) to preprocess the datasets. + +### Training + +The main interface is implemented in `tools/train.py`, which basically does + +1. initialize the config with `cfg = setup_cfg(args)` where `args` contains the command-line input (see `tools/train.py` for the list of input arguments); +2. instantiate a `trainer` with `build_trainer(cfg)` which loads the dataset and builds a deep neural network model; +3. call `trainer.train()` for training and evaluating the model. + +Below we provide an example for training a source-only baseline on the popular domain adaptation dataset, Office-31, + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py \ +--root $DATA \ +--trainer SourceOnly \ +--source-domains amazon \ +--target-domains webcam \ +--dataset-config-file configs/datasets/da/office31.yaml \ +--config-file configs/trainers/da/source_only/office31.yaml \ +--output-dir output/source_only_office31 +``` + +`$DATA` denotes the location where datasets are installed. `--dataset-config-file` loads the common setting for the dataset (Office-31 in this case) such as image size and model architecture. `--config-file` loads the algorithm-specific setting such as hyper-parameters and optimization parameters. + +To use multiple sources, namely the multi-source domain adaptation task, one just needs to add more sources to `--source-domains`. For instance, to train a source-only baseline on miniDomainNet, one can do + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py \ +--root $DATA \ +--trainer SourceOnly \ +--source-domains clipart painting real \ +--target-domains sketch \ +--dataset-config-file configs/datasets/da/mini_domainnet.yaml \ +--config-file configs/trainers/da/source_only/mini_domainnet.yaml \ +--output-dir output/source_only_minidn +``` + +After the training finishes, the model weights will be saved under the specified output directory, along with a log file and a tensorboard file for visualization. + +To print out the results saved in the log file (so you do not need to exhaustively go through all log files and calculate the mean/std by yourself), you can use `tools/parse_test_res.py`. The instruction can be found in the code. + +For other trainers such as `MCD`, you can set `--trainer MCD` while keeping the config file unchanged, i.e. using the same training parameters as `SourceOnly` (in the simplest case). To modify the hyper-parameters in MCD, like `N_STEP_F` (number of steps to update the feature extractor), you can append `TRAINER.MCD.N_STEP_F 4` to the existing input arguments (otherwise the default value will be used). Alternatively, you can create a new `.yaml` config file to store your custom setting. See [here](https://github.com/KaiyangZhou/Dassl.pytorch/blob/master/dassl/config/defaults.py#L176) for a complete list of algorithm-specific hyper-parameters. + +### Test +Model testing can be done by using `--eval-only`, which asks the code to run `trainer.test()`. You also need to provide the trained model and specify which model file (i.e. saved at which epoch) to use. For example, to use `model.pth.tar-20` saved at `output/source_only_office31/model`, you can do + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/train.py \ +--root $DATA \ +--trainer SourceOnly \ +--source-domains amazon \ +--target-domains webcam \ +--dataset-config-file configs/datasets/da/office31.yaml \ +--config-file configs/trainers/da/source_only/office31.yaml \ +--output-dir output/source_only_office31_test \ +--eval-only \ +--model-dir output/source_only_office31 \ +--load-epoch 20 +``` + +Note that `--model-dir` takes as input the directory path which was specified in `--output-dir` in the training stage. + +### Write a new trainer +A good practice is to go through `dassl/engine/trainer.py` to get familar with the base trainer classes, which provide generic functions and training loops. To write a trainer class for domain adaptation or semi-supervised learning, the new class can subclass `TrainerXU`. For domain generalization, the new class can subclass `TrainerX`. In particular, `TrainerXU` and `TrainerX` mainly differ in whether using a data loader for unlabeled data. With the base classes, a new trainer may only need to implement the `forward_backward()` method, which performs loss computation and model update. See `dassl/enigne/da/source_only.py` for example. + +### Add a new backbone/head/network +`backbone` corresponds to a convolutional neural network model which performs feature extraction. `head` (which is an optional module) is mounted on top of `backbone` for further processing, which can be, for example, a MLP. `backbone` and `head` are basic building blocks for constructing a `SimpleNet()` (see `dassl/engine/trainer.py`) which serves as the primary model for a task. `network` contains custom neural network models, such as an image generator. + +To add a new module, namely a backbone/head/network, you need to first register the module using the corresponding `registry`, i.e. `BACKBONE_REGISTRY` for `backbone`, `HEAD_REGISTRY` for `head` and `NETWORK_RESIGTRY` for `network`. Note that for a new `backbone`, we require the model to subclass `Backbone` as defined in `dassl/modeling/backbone/backbone.py` and specify the `self._out_features` attribute. + +We provide an example below for how to add a new `backbone`. +```python +from dassl.modeling import Backbone, BACKBONE_REGISTRY + +class MyBackbone(Backbone): + + def __init__(self): + super().__init__() + # Create layers + self.conv = ... + + self._out_features = 2048 + + def forward(self, x): + # Extract and return features + +@BACKBONE_REGISTRY.register() +def my_backbone(**kwargs): + return MyBackbone() +``` +Then, you can set `MODEL.BACKBONE.NAME` to `my_backbone` to use your own architecture. For more details, please refer to the source code in `dassl/modeling`. + +### Add a dataset +An example code structure is shown below. Make sure you subclass `DatasetBase` and register the dataset with `@DATASET_REGISTRY.register()`. All you need is to load `train_x`, `train_u` (optional), `val` (optional) and `test`, among which `train_u` and `val` could be `None` or simply ignored. Each of these variables contains a list of `Datum` objects. A `Datum` object (implemented [here](https://github.com/KaiyangZhou/Dassl.pytorch/blob/master/dassl/data/datasets/base_dataset.py#L12)) contains information for a single image, like `impath` (string) and `label` (int). + +```python +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase + +@DATASET_REGISTRY.register() +class NewDataset(DatasetBase): + + dataset_dir = '' + + def __init__(self, cfg): + + train_x = ... + train_u = ... # optional, can be None + val = ... # optional, can be None + test = ... + + super().__init__(train_x=train_x, train_u=train_u, val=val, test=test) +``` + +We suggest you take a look at the datasets code in some projects like [this](https://github.com/KaiyangZhou/CoOp), which is built on top of Dassl. + +## Relevant Research + +We would like to share here our research relevant to Dassl. + +- [On-Device Domain Generalization](https://arxiv.org/abs/2209.07521) +- [Domain Generalization: A Survey](https://arxiv.org/abs/2103.02503) (TPAMI 2022) +- [Domain Adaptive Ensemble Learning](https://arxiv.org/abs/2003.07325) (TIP 2021) +- [MixStyle Neural Networks for Domain Generalization and Adaptation](https://arxiv.org/abs/2107.02053) +- [Semi-Supervised Domain Generalization with Stochastic StyleMatch](https://arxiv.org/abs/2106.00592) +- [Domain Generalization with MixStyle](https://openreview.net/forum?id=6xHJ37MVxxp) (ICLR 2021) +- [Learning to Generate Novel Domains for Domain Generalization](https://arxiv.org/abs/2007.03304) (ECCV 2020) +- [Deep Domain-Adversarial Image Generation for Domain Generalisation](https://arxiv.org/abs/2003.06054) (AAAI 2020) + +## Citation + +If you find this code useful to your research, please give credit to the following paper + +``` +@article{zhou2022domain, + title={Domain generalization: A survey}, + author={Zhou, Kaiyang and Liu, Ziwei and Qiao, Yu and Xiang, Tao and Loy, Chen Change}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + year={2022}, + publisher={IEEE} +} + +@article{zhou2021domain, + title={Domain adaptive ensemble learning}, + author={Zhou, Kaiyang and Yang, Yongxin and Qiao, Yu and Xiang, Tao}, + journal={IEEE Transactions on Image Processing}, + volume={30}, + pages={8008--8018}, + year={2021}, + publisher={IEEE} +} +``` diff --git a/Dassl.pytorch/configs/README.md b/Dassl.pytorch/configs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..18b90a466aeeb09aeca095f1fec30c1ef3b74a30 --- /dev/null +++ b/Dassl.pytorch/configs/README.md @@ -0,0 +1 @@ +The `datasets/` folder contains dataset-specific config files which define the standard protocols (e.g., image size, data augmentation, network architecture) used by most papers. The `trainers/` folder contains method-specific config files which define optimization algorithms (e.g., optimizer, epoch) and hyperparameter settings. diff --git a/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml b/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52c086faf709de3b8996afc09d3dcce02516f2f7 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/da/cifar_stl.yaml @@ -0,0 +1,7 @@ +INPUT: + SIZE: (32, 32) + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFARSTL" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/da/digit5.yaml b/Dassl.pytorch/configs/datasets/da/digit5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f754d643be265762b6dddb33b11ce87acc94d29c --- /dev/null +++ b/Dassl.pytorch/configs/datasets/da/digit5.yaml @@ -0,0 +1,12 @@ +INPUT: + SIZE: (32, 32) + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + TRANSFORMS: ["normalize"] + +DATASET: + NAME: "Digit5" + +MODEL: + BACKBONE: + NAME: "cnn_digit5_m3sda" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/da/domainnet.yaml b/Dassl.pytorch/configs/datasets/da/domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..075f92323970962f05fa950578b103d9b5154bbe --- /dev/null +++ b/Dassl.pytorch/configs/datasets/da/domainnet.yaml @@ -0,0 +1,10 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "DomainNet" + +MODEL: + BACKBONE: + NAME: "resnet101" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml b/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfb34d8a3f431e86dab223780b5e675b8d797ee0 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/da/mini_domainnet.yaml @@ -0,0 +1,10 @@ +INPUT: + SIZE: (96, 96) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "miniDomainNet" + +MODEL: + BACKBONE: + NAME: "resnet18" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/da/office31.yaml b/Dassl.pytorch/configs/datasets/da/office31.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77cca035c1e6f1a085bb8cb6043935dcf3a4b6ea --- /dev/null +++ b/Dassl.pytorch/configs/datasets/da/office31.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "Office31" + +MODEL: + BACKBONE: + NAME: "resnet50" + HEAD: + NAME: "mlp" + HIDDEN_LAYERS: [256] + DROPOUT: 0. \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/da/office_home.yaml b/Dassl.pytorch/configs/datasets/da/office_home.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e181fda48aedd9b968607f9a9f973f2b450a1ff --- /dev/null +++ b/Dassl.pytorch/configs/datasets/da/office_home.yaml @@ -0,0 +1,5 @@ +INPUT: + SIZE: (224, 224) + +DATASET: + NAME: "OfficeHome" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/da/visda17.yaml b/Dassl.pytorch/configs/datasets/da/visda17.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d54f2f63861c2539c81bb20215101c981103104e --- /dev/null +++ b/Dassl.pytorch/configs/datasets/da/visda17.yaml @@ -0,0 +1,13 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "center_crop", "normalize"] + +DATASET: + NAME: "VisDA17" + +MODEL: + BACKBONE: + NAME: "resnet101" + +TEST: + PER_CLASS_RESULT: True \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml b/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11a2c4d7019c2e8dcb4d2e297ae8c12767e27354 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/camelyon17.yaml @@ -0,0 +1,6 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +DATASET: + NAME: "Camelyon17" diff --git a/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml b/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4b7f9171d972b49aa4fa9ae9d6dba61edf982f6 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/cifar100_c.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFAR100C" + CIFAR_C_TYPE: "fog" + CIFAR_C_LEVEL: 5 + +MODEL: + BACKBONE: + NAME: "wide_resnet_16_4" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml b/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec5702ed240580b980d4fa7dce8e19669b5798ad --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/cifar10_c.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFAR10C" + CIFAR_C_TYPE: "fog" + CIFAR_C_LEVEL: 5 + +MODEL: + BACKBONE: + NAME: "wide_resnet_16_4" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/dg/digit_single.yaml b/Dassl.pytorch/configs/datasets/dg/digit_single.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6bacbb2457b9b2b8174d35aaa07b55427325096 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/digit_single.yaml @@ -0,0 +1,12 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "DigitSingle" + +MODEL: + BACKBONE: + NAME: "cnn_digitsingle" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml b/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca25e21326fa613299ed24210575c0c6aec51fdf --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/digits_dg.yaml @@ -0,0 +1,12 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "DigitsDG" + +MODEL: + BACKBONE: + NAME: "cnn_digitsdg" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/dg/fmow.yaml b/Dassl.pytorch/configs/datasets/dg/fmow.yaml new file mode 100644 index 0000000000000000000000000000000000000000..825ee809c32b73206f4620f0a81cc6e5f357566f --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/fmow.yaml @@ -0,0 +1,6 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +DATASET: + NAME: "FMoW" diff --git a/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml b/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8aa2eb522065b9abaf16ea4ed30e39645ae3be6 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/iwildcam.yaml @@ -0,0 +1,6 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +DATASET: + NAME: "IWildCam" diff --git a/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml b/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0835973c86ac056dcf0fbc3beb95f110014a0919 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/office_home_dg.yaml @@ -0,0 +1,11 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "OfficeHomeDG" + +MODEL: + BACKBONE: + NAME: "resnet18" + PRETRAINED: True \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/dg/pacs.yaml b/Dassl.pytorch/configs/datasets/dg/pacs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..682ab1c807a5694ec3d42e9da5f7565d90b274fb --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/pacs.yaml @@ -0,0 +1,11 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "PACS" + +MODEL: + BACKBONE: + NAME: "resnet18" + PRETRAINED: True \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/dg/vlcs.yaml b/Dassl.pytorch/configs/datasets/dg/vlcs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c8804cf73fdb44b0660692149b529ed58c14c2e --- /dev/null +++ b/Dassl.pytorch/configs/datasets/dg/vlcs.yaml @@ -0,0 +1,11 @@ +INPUT: + SIZE: (224, 224) + TRANSFORMS: ["random_flip", "random_translation", "normalize"] + +DATASET: + NAME: "VLCS" + +MODEL: + BACKBONE: + NAME: "resnet18" + PRETRAINED: True \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml b/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63b6a1df2eae0ef5cb454a8eb51e143ac3184430 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/ssl/cifar10.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + +DATASET: + NAME: "CIFAR10" + NUM_LABELED: 4000 + VAL_PERCENT: 0. + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml b/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6230a881904418885bc9755f6fdc71582465b902 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/ssl/cifar100.yaml @@ -0,0 +1,15 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + CROP_PADDING: 4 + +DATASET: + NAME: "CIFAR100" + NUM_LABELED: 10000 + VAL_PERCENT: 0. + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/ssl/stl10.yaml b/Dassl.pytorch/configs/datasets/ssl/stl10.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b11df1263da3b5212affc132998aae76308e86a --- /dev/null +++ b/Dassl.pytorch/configs/datasets/ssl/stl10.yaml @@ -0,0 +1,14 @@ +INPUT: + SIZE: (96, 96) + TRANSFORMS: ["random_flip", "random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + CROP_PADDING: 4 + +DATASET: + NAME: "STL10" + STL10_FOLD: 0 + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/Dassl.pytorch/configs/datasets/ssl/svhn.yaml b/Dassl.pytorch/configs/datasets/ssl/svhn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd3b527b4693d3f61126f2425caef3ffcf54d725 --- /dev/null +++ b/Dassl.pytorch/configs/datasets/ssl/svhn.yaml @@ -0,0 +1,15 @@ +INPUT: + SIZE: (32, 32) + TRANSFORMS: ["random_crop", "normalize"] + PIXEL_MEAN: [0.5, 0.5, 0.5] + PIXEL_STD: [0.5, 0.5, 0.5] + CROP_PADDING: 4 + +DATASET: + NAME: "SVHN" + NUM_LABELED: 1000 + VAL_PERCENT: 0. + +MODEL: + BACKBONE: + NAME: "wide_resnet_28_2" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml b/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04ba3467d0b65d39c9b0357ec45ecf506fc745cf --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/cdac/digit5.yaml @@ -0,0 +1,20 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomSampler" + BATCH_SIZE: 64 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 192 + TEST: + BATCH_SIZE: 256 + K_TRANSFORMS: 2 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 90 + RAMPUP_ITRS: 10000 + +TRAINER: + CDAC: + STRONG_TRANSFORMS: ["randaugment", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml b/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5fd5593492bfdf16d3f838eb1607b4e6a397f7f --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/cdac/domainnet.yaml @@ -0,0 +1,20 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 6 + TEST: + BATCH_SIZE: 30 + K_TRANSFORMS: 2 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 90 + RAMPUP_ITRS: 10000 + +TRAINER: + CDAC: + STRONG_TRANSFORMS: ["randaugment", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml b/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb4f9c122c9a66496fad25d899bc90cc26c89eeb --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/cdac/mini_domainnet.yaml @@ -0,0 +1,21 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 64 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 192 + TEST: + BATCH_SIZE: 200 + K_TRANSFORMS: 2 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 60 + RAMPUP_ITRS: 10000 + LR_SCHEDULER: "cosine" + +TRAINER: + CDAC: + STRONG_TRANSFORMS: ["randaugment", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml b/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d83bfe42ee6208112c5fd78c80039ff3b55ba999 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/dael/digit5.yaml @@ -0,0 +1,20 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 256 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 256 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [30] + MAX_EPOCH: 30 + LR_SCHEDULER: "cosine" + +TRAINER: + DAEL: + STRONG_TRANSFORMS: ["randaugment2", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml b/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc7cd211efd249663cbc17713055a514745aea6d --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/dael/domainnet.yaml @@ -0,0 +1,19 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 6 + TEST: + BATCH_SIZE: 30 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" + +TRAINER: + DAEL: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml b/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..708ddcba9e679cedc606b4c1f8ab593718288dbc --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/dael/mini_domainnet.yaml @@ -0,0 +1,19 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 192 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 200 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" + +TRAINER: + DAEL: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml b/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a70887b0c74cd2fee9d60c9324e1b8066d700bf0 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/m3sda/digit5.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 256 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 256 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [30] + MAX_EPOCH: 30 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml b/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5abaa12a79cdd516a8cf99efbcc9dc6a8c557fbe --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/m3sda/domainnet.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 6 + TEST: + BATCH_SIZE: 30 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml b/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6edf3e3cbde7f3b30a27f4989d2ac100bea278e7 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/m3sda/mini_domainnet.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 192 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 200 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml b/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64ce348e5b3ce64209193755dae9f360f71a9f8d --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/source_only/digit5.yaml @@ -0,0 +1,12 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 256 + TEST: + BATCH_SIZE: 256 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [30] + MAX_EPOCH: 30 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml b/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd8471eb2a79dbf70e0b5c606a82d042c0148f70 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/source_only/mini_domainnet.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 128 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml b/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fb73ee1ba1ec366cf367fa974919a7c4245e163 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/source_only/office31.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 32 + +OPTIM: + NAME: "sgd" + LR: 0.002 + STEPSIZE: [20] + MAX_EPOCH: 20 \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml b/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c25fb097ef7b7736a24703cb39dd5d894d55594 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/da/source_only/visda17.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 32 + +OPTIM: + NAME: "sgd" + LR: 0.0001 + STEPSIZE: [2] + MAX_EPOCH: 2 + +TRAIN: + PRINT_FREQ: 50 + COUNT_ITER: "train_u" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml b/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..453043132b0bebac7915457a639a7e8bfad0cc13 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/daeldg/digits_dg.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 120 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [20] + MAX_EPOCH: 50 + +TRAINER: + DAELDG: + STRONG_TRANSFORMS: ["randaugment2", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml b/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b17f5a7e92dab5332c4e33c3123fa5d97e8dd60 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/daeldg/office_home_dg.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" + +TRAINER: + DAELDG: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml b/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b17f5a7e92dab5332c4e33c3123fa5d97e8dd60 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/daeldg/pacs.yaml @@ -0,0 +1,16 @@ +DATALOADER: + TRAIN_X: + SAMPLER: "RandomDomainSampler" + BATCH_SIZE: 30 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 40 + LR_SCHEDULER: "cosine" + +TRAINER: + DAELDG: + STRONG_TRANSFORMS: ["random_flip", "cutout", "randaugment2", "normalize"] \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml b/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ee803029c6acfa1f6904a48a9d93e91c8f54014 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/ddaig/digits_dg.yaml @@ -0,0 +1,20 @@ +INPUT: + PIXEL_MEAN: [0., 0., 0.] + PIXEL_STD: [1., 1., 1.] + +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 128 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [20] + MAX_EPOCH: 50 + +TRAINER: + DDAIG: + G_ARCH: "fcn_3x32_gctx" + LMDA: 0.3 \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml b/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b55f81009379aa043eabbb7ec4c4f7c70cd0e32d --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/ddaig/office_home_dg.yaml @@ -0,0 +1,21 @@ +INPUT: + PIXEL_MEAN: [0., 0., 0.] + PIXEL_STD: [1., 1., 1.] + +DATALOADER: + TRAIN_X: + BATCH_SIZE: 16 + TEST: + BATCH_SIZE: 16 + +OPTIM: + NAME: "sgd" + LR: 0.0005 + STEPSIZE: [20] + MAX_EPOCH: 25 + +TRAINER: + DDAIG: + G_ARCH: "fcn_3x64_gctx" + WARMUP: 3 + LMDA: 0.3 \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml b/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b55f81009379aa043eabbb7ec4c4f7c70cd0e32d --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/ddaig/pacs.yaml @@ -0,0 +1,21 @@ +INPUT: + PIXEL_MEAN: [0., 0., 0.] + PIXEL_STD: [1., 1., 1.] + +DATALOADER: + TRAIN_X: + BATCH_SIZE: 16 + TEST: + BATCH_SIZE: 16 + +OPTIM: + NAME: "sgd" + LR: 0.0005 + STEPSIZE: [20] + MAX_EPOCH: 25 + +TRAINER: + DDAIG: + G_ARCH: "fcn_3x64_gctx" + WARMUP: 3 + LMDA: 0.3 \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml b/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b73fbeac06b4a26a29fa9c41961fda146a9d154 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/vanilla/digits_dg.yaml @@ -0,0 +1,15 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [20] + MAX_EPOCH: 50 + +TRAIN: + PRINT_FREQ: 20 \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml b/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd8471eb2a79dbf70e0b5c606a82d042c0148f70 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/vanilla/mini_domainnet.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 128 + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 60 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml b/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43f621425a9b2d53c0af6ff105397180144bfcb0 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/vanilla/office_home_dg.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml b/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43f621425a9b2d53c0af6ff105397180144bfcb0 --- /dev/null +++ b/Dassl.pytorch/configs/trainers/dg/vanilla/pacs.yaml @@ -0,0 +1,11 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 64 + TEST: + BATCH_SIZE: 100 + +OPTIM: + NAME: "sgd" + LR: 0.001 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" \ No newline at end of file diff --git a/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml b/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a03fc6c95d69ec7f73dafe0954b32764f4ed8c1e --- /dev/null +++ b/Dassl.pytorch/configs/trainers/ssl/fixmatch/cifar10.yaml @@ -0,0 +1,23 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 64 + TRAIN_U: + SAME_AS_X: False + BATCH_SIZE: 448 + TEST: + BATCH_SIZE: 500 + +OPTIM: + NAME: "sgd" + LR: 0.05 + STEPSIZE: [4000] + MAX_EPOCH: 4000 + LR_SCHEDULER: "cosine" + +TRAIN: + COUNT_ITER: "train_u" + PRINT_FREQ: 10 + +TRAINER: + FIXMATCH: + STRONG_TRANSFORMS: ["random_flip", "randaugment_fixmatch", "normalize", "cutout"] \ No newline at end of file diff --git a/Dassl.pytorch/dassl/.DS_Store b/Dassl.pytorch/dassl/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/Dassl.pytorch/dassl/.DS_Store differ diff --git a/Dassl.pytorch/dassl/__init__.py b/Dassl.pytorch/dassl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..225e3ca01fcf66bc0c7768d756ae145509419c6f --- /dev/null +++ b/Dassl.pytorch/dassl/__init__.py @@ -0,0 +1,18 @@ +""" +Dassl +------ +PyTorch toolbox for domain adaptation and semi-supervised learning. + +URL: https://github.com/KaiyangZhou/Dassl.pytorch + +@article{zhou2020domain, + title={Domain Adaptive Ensemble Learning}, + author={Zhou, Kaiyang and Yang, Yongxin and Qiao, Yu and Xiang, Tao}, + journal={arXiv preprint arXiv:2003.07325}, + year={2020} +} +""" + +__version__ = "0.6.3" +__author__ = "Kaiyang Zhou" +__homepage__ = "https://kaiyangzhou.github.io/" diff --git a/Dassl.pytorch/dassl/config/__init__.py b/Dassl.pytorch/dassl/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d745fbabfcee7ff8889a9650bd268f849e98b3f6 --- /dev/null +++ b/Dassl.pytorch/dassl/config/__init__.py @@ -0,0 +1,21 @@ +from .defaults import _C as cfg_default + + +def get_cfg_default(): + return cfg_default.clone() + + +def clean_cfg(cfg, trainer): + """Remove unused trainers (configs). + + Aim: Only show relevant information when calling print(cfg). + + Args: + cfg (_C): cfg instance. + trainer (str): trainer name. + """ + keys = list(cfg.TRAINER.keys()) + for key in keys: + if key == "NAME" or key == trainer.upper(): + continue + cfg.TRAINER.pop(key, None) diff --git a/Dassl.pytorch/dassl/config/defaults.py b/Dassl.pytorch/dassl/config/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..cd873e978e7108e582ffba6d478acf94b66629dc --- /dev/null +++ b/Dassl.pytorch/dassl/config/defaults.py @@ -0,0 +1,306 @@ +from yacs.config import CfgNode as CN + +########################### +# Config definition +########################### + +_C = CN() + +_C.VERSION = 1 + +# Directory to save the output files (like log.txt and model weights) +_C.OUTPUT_DIR = "./output" +# Path to a directory where the files were saved previously +_C.RESUME = "" +# Set seed to negative value to randomize everything +# Set seed to positive value to use a fixed seed +_C.SEED = -1 +_C.USE_CUDA = True +# Print detailed information +# E.g. trainer, dataset, and backbone +_C.VERBOSE = True + +########################### +# Input +########################### +_C.INPUT = CN() +_C.INPUT.SIZE = (224, 224) +# Mode of interpolation in resize functions +_C.INPUT.INTERPOLATION = "bilinear" +# For available choices please refer to transforms.py +_C.INPUT.TRANSFORMS = () +# If True, tfm_train and tfm_test will be None +_C.INPUT.NO_TRANSFORM = False +# Mean and std (default: ImageNet) +_C.INPUT.PIXEL_MEAN = [0.485, 0.456, 0.406] +_C.INPUT.PIXEL_STD = [0.229, 0.224, 0.225] +# Random crop +_C.INPUT.CROP_PADDING = 4 +# Random resized crop +_C.INPUT.RRCROP_SCALE = (0.08, 1.0) +# Cutout +_C.INPUT.CUTOUT_N = 1 +_C.INPUT.CUTOUT_LEN = 16 +# Gaussian noise +_C.INPUT.GN_MEAN = 0.0 +_C.INPUT.GN_STD = 0.15 +# RandomAugment +_C.INPUT.RANDAUGMENT_N = 2 +_C.INPUT.RANDAUGMENT_M = 10 +# ColorJitter (brightness, contrast, saturation, hue) +_C.INPUT.COLORJITTER_B = 0.4 +_C.INPUT.COLORJITTER_C = 0.4 +_C.INPUT.COLORJITTER_S = 0.4 +_C.INPUT.COLORJITTER_H = 0.1 +# Random gray scale's probability +_C.INPUT.RGS_P = 0.2 +# Gaussian blur +_C.INPUT.GB_P = 0.5 # propability of applying this operation +_C.INPUT.GB_K = 21 # kernel size (should be an odd number) + +########################### +# Dataset +########################### +_C.DATASET = CN() +# Directory where datasets are stored +_C.DATASET.ROOT = "" +_C.DATASET.NAME = "" +# List of source/target domains' names (strings) +# Do not apply to some datasets, which have pre-defined splits +_C.DATASET.SOURCE_DOMAINS = () +_C.DATASET.TARGET_DOMAINS = () +# Number of labeled instances in total +# Useful for the semi-supervised learning +_C.DATASET.NUM_LABELED = -1 +# Number of images per class +_C.DATASET.NUM_SHOTS = -1 +# Percentage of validation data (only used for SSL datasets) +# Set to 0 if do not want to use val data +# Using val data for hyperparameter tuning was done in Oliver et al. 2018 +_C.DATASET.VAL_PERCENT = 0.1 +# Fold index for STL-10 dataset (normal range is 0 - 9) +# Negative number means None +_C.DATASET.STL10_FOLD = -1 +# CIFAR-10/100-C's corruption type and intensity level +_C.DATASET.CIFAR_C_TYPE = "" +_C.DATASET.CIFAR_C_LEVEL = 1 +# Use all data in the unlabeled data set (e.g. FixMatch) +_C.DATASET.ALL_AS_UNLABELED = False + +########################### +# Dataloader +########################### +_C.DATALOADER = CN() +_C.DATALOADER.NUM_WORKERS = 4 +# Apply transformations to an image K times (during training) +_C.DATALOADER.K_TRANSFORMS = 1 +# img0 denotes image tensor without augmentation +# Useful for consistency learning +_C.DATALOADER.RETURN_IMG0 = False +# Setting for the train_x data-loader +_C.DATALOADER.TRAIN_X = CN() +_C.DATALOADER.TRAIN_X.SAMPLER = "RandomSampler" +_C.DATALOADER.TRAIN_X.BATCH_SIZE = 32 +# Parameter for RandomDomainSampler +# 0 or -1 means sampling from all domains +_C.DATALOADER.TRAIN_X.N_DOMAIN = 0 +# Parameter of RandomClassSampler +# Number of instances per class +_C.DATALOADER.TRAIN_X.N_INS = 16 + +# Setting for the train_u data-loader +_C.DATALOADER.TRAIN_U = CN() +# Set to false if you want to have unique +# data loader params for train_u +_C.DATALOADER.TRAIN_U.SAME_AS_X = True +_C.DATALOADER.TRAIN_U.SAMPLER = "RandomSampler" +_C.DATALOADER.TRAIN_U.BATCH_SIZE = 32 +_C.DATALOADER.TRAIN_U.N_DOMAIN = 0 +_C.DATALOADER.TRAIN_U.N_INS = 16 + +# Setting for the test data-loader +_C.DATALOADER.TEST = CN() +_C.DATALOADER.TEST.SAMPLER = "SequentialSampler" +_C.DATALOADER.TEST.BATCH_SIZE = 32 + +########################### +# Model +########################### +_C.MODEL = CN() +# Path to model weights (for initialization) +_C.MODEL.INIT_WEIGHTS = "" +_C.MODEL.BACKBONE = CN() +_C.MODEL.BACKBONE.NAME = "" +_C.MODEL.BACKBONE.PRETRAINED = True +# Definition of embedding layers +_C.MODEL.HEAD = CN() +# If none, do not construct embedding layers, the +# backbone's output will be passed to the classifier +_C.MODEL.HEAD.NAME = "" +# Structure of hidden layers (a list), e.g. [512, 512] +# If undefined, no embedding layer will be constructed +_C.MODEL.HEAD.HIDDEN_LAYERS = () +_C.MODEL.HEAD.ACTIVATION = "relu" +_C.MODEL.HEAD.BN = True +_C.MODEL.HEAD.DROPOUT = 0.0 + +########################### +# Optimization +########################### +_C.OPTIM = CN() +_C.OPTIM.NAME = "adam" +_C.OPTIM.LR = 0.0003 +_C.OPTIM.WEIGHT_DECAY = 5e-4 +_C.OPTIM.MOMENTUM = 0.9 +_C.OPTIM.SGD_DAMPNING = 0 +_C.OPTIM.SGD_NESTEROV = False +_C.OPTIM.RMSPROP_ALPHA = 0.99 +# The following also apply to other +# adaptive optimizers like adamw +_C.OPTIM.ADAM_BETA1 = 0.9 +_C.OPTIM.ADAM_BETA2 = 0.999 +# STAGED_LR allows different layers to have +# different lr, e.g. pre-trained base layers +# can be assigned a smaller lr than the new +# classification layer +_C.OPTIM.STAGED_LR = False +_C.OPTIM.NEW_LAYERS = () +_C.OPTIM.BASE_LR_MULT = 0.1 +# Learning rate scheduler +_C.OPTIM.LR_SCHEDULER = "single_step" +# -1 or 0 means the stepsize is equal to max_epoch +_C.OPTIM.STEPSIZE = (-1, ) +_C.OPTIM.GAMMA = 0.1 +_C.OPTIM.MAX_EPOCH = 10 +# Set WARMUP_EPOCH larger than 0 to activate warmup training +_C.OPTIM.WARMUP_EPOCH = -1 +# Either linear or constant +_C.OPTIM.WARMUP_TYPE = "linear" +# Constant learning rate when type=constant +_C.OPTIM.WARMUP_CONS_LR = 1e-5 +# Minimum learning rate when type=linear +_C.OPTIM.WARMUP_MIN_LR = 1e-5 +# Recount epoch for the next scheduler (last_epoch=-1) +# Otherwise last_epoch=warmup_epoch +_C.OPTIM.WARMUP_RECOUNT = True + +########################### +# Train +########################### +_C.TRAIN = CN() +# How often (epoch) to save model during training +# Set to 0 or negative value to only save the last one +_C.TRAIN.CHECKPOINT_FREQ = 0 +# How often (batch) to print training information +_C.TRAIN.PRINT_FREQ = 10 +# Use 'train_x', 'train_u' or 'smaller_one' to count +# the number of iterations in an epoch (for DA and SSL) +_C.TRAIN.COUNT_ITER = "train_x" + +########################### +# Test +########################### +_C.TEST = CN() +_C.TEST.EVALUATOR = "Classification" +_C.TEST.PER_CLASS_RESULT = False +# Compute confusion matrix, which will be saved +# to $OUTPUT_DIR/cmat.pt +_C.TEST.COMPUTE_CMAT = False +# If NO_TEST=True, no testing will be conducted +_C.TEST.NO_TEST = False +# Use test or val set for FINAL evaluation +_C.TEST.SPLIT = "test" +# Which model to test after training (last_step or best_val) +# If best_val, evaluation is done every epoch (if val data +# is unavailable, test data will be used) +_C.TEST.FINAL_MODEL = "last_step" + +########################### +# Trainer specifics +########################### +_C.TRAINER = CN() +_C.TRAINER.NAME = "" + +###### +# DA +###### +# MCD +_C.TRAINER.MCD = CN() +_C.TRAINER.MCD.N_STEP_F = 4 # number of steps to train F +# MME +_C.TRAINER.MME = CN() +_C.TRAINER.MME.LMDA = 0.1 # weight for the entropy loss +# CDAC +_C.TRAINER.CDAC = CN() +_C.TRAINER.CDAC.CLASS_LR_MULTI = 10 +_C.TRAINER.CDAC.RAMPUP_COEF = 30 +_C.TRAINER.CDAC.RAMPUP_ITRS = 1000 +_C.TRAINER.CDAC.TOPK_MATCH = 5 +_C.TRAINER.CDAC.P_THRESH = 0.95 +_C.TRAINER.CDAC.STRONG_TRANSFORMS = () +# SE (SelfEnsembling) +_C.TRAINER.SE = CN() +_C.TRAINER.SE.EMA_ALPHA = 0.999 +_C.TRAINER.SE.CONF_THRE = 0.95 +_C.TRAINER.SE.RAMPUP = 300 +# M3SDA +_C.TRAINER.M3SDA = CN() +_C.TRAINER.M3SDA.LMDA = 0.5 # weight for the moment distance loss +_C.TRAINER.M3SDA.N_STEP_F = 4 # follow MCD +# DAEL +_C.TRAINER.DAEL = CN() +_C.TRAINER.DAEL.WEIGHT_U = 0.5 # weight on the unlabeled loss +_C.TRAINER.DAEL.CONF_THRE = 0.95 # confidence threshold +_C.TRAINER.DAEL.STRONG_TRANSFORMS = () + +###### +# DG +###### +# CrossGrad +_C.TRAINER.CROSSGRAD = CN() +_C.TRAINER.CROSSGRAD.EPS_F = 1.0 # scaling parameter for D's gradients +_C.TRAINER.CROSSGRAD.EPS_D = 1.0 # scaling parameter for F's gradients +_C.TRAINER.CROSSGRAD.ALPHA_F = 0.5 # balancing weight for the label net's loss +_C.TRAINER.CROSSGRAD.ALPHA_D = 0.5 # balancing weight for the domain net's loss +# DDAIG +_C.TRAINER.DDAIG = CN() +_C.TRAINER.DDAIG.G_ARCH = "" # generator's architecture +_C.TRAINER.DDAIG.LMDA = 0.3 # perturbation weight +_C.TRAINER.DDAIG.CLAMP = False # clamp perturbation values +_C.TRAINER.DDAIG.CLAMP_MIN = -1.0 +_C.TRAINER.DDAIG.CLAMP_MAX = 1.0 +_C.TRAINER.DDAIG.WARMUP = 0 +_C.TRAINER.DDAIG.ALPHA = 0.5 # balancing weight for the losses +# DAELDG (the DG version of DAEL) +_C.TRAINER.DAELDG = CN() +_C.TRAINER.DAELDG.WEIGHT_U = 0.5 # weight on the unlabeled loss +_C.TRAINER.DAELDG.CONF_THRE = 0.95 # confidence threshold +_C.TRAINER.DAELDG.STRONG_TRANSFORMS = () +# DOMAINMIX +_C.TRAINER.DOMAINMIX = CN() +_C.TRAINER.DOMAINMIX.TYPE = "crossdomain" +_C.TRAINER.DOMAINMIX.ALPHA = 1.0 +_C.TRAINER.DOMAINMIX.BETA = 1.0 + +###### +# SSL +###### +# EntMin +_C.TRAINER.ENTMIN = CN() +_C.TRAINER.ENTMIN.LMDA = 1e-3 # weight on the entropy loss +# Mean Teacher +_C.TRAINER.MEANTEACHER = CN() +_C.TRAINER.MEANTEACHER.WEIGHT_U = 1.0 # weight on the unlabeled loss +_C.TRAINER.MEANTEACHER.EMA_ALPHA = 0.999 +_C.TRAINER.MEANTEACHER.RAMPUP = 5 # epochs used to ramp up the loss_u weight +# MixMatch +_C.TRAINER.MIXMATCH = CN() +_C.TRAINER.MIXMATCH.WEIGHT_U = 100.0 # weight on the unlabeled loss +_C.TRAINER.MIXMATCH.TEMP = 2.0 # temperature for sharpening the probability +_C.TRAINER.MIXMATCH.MIXUP_BETA = 0.75 +_C.TRAINER.MIXMATCH.RAMPUP = 20000 # steps used to ramp up the loss_u weight +# FixMatch +_C.TRAINER.FIXMATCH = CN() +_C.TRAINER.FIXMATCH.WEIGHT_U = 1.0 # weight on the unlabeled loss +_C.TRAINER.FIXMATCH.CONF_THRE = 0.95 # confidence threshold +_C.TRAINER.FIXMATCH.STRONG_TRANSFORMS = () diff --git a/Dassl.pytorch/dassl/data/__init__.py b/Dassl.pytorch/dassl/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..66ca734ef1123667efc14264f9f52cafacbda70f --- /dev/null +++ b/Dassl.pytorch/dassl/data/__init__.py @@ -0,0 +1 @@ +from .data_manager import DataManager, DatasetWrapper diff --git a/Dassl.pytorch/dassl/data/data_manager.py b/Dassl.pytorch/dassl/data/data_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..c0a4b4293be033392cdddf23c6b630b998c56247 --- /dev/null +++ b/Dassl.pytorch/dassl/data/data_manager.py @@ -0,0 +1,261 @@ +import torch +import torchvision.transforms as T +from tabulate import tabulate +from torch.utils.data import Dataset as TorchDataset + +from dassl.utils import read_image + +from .datasets import build_dataset +from .samplers import build_sampler +from .transforms import INTERPOLATION_MODES, build_transform + + +def build_data_loader( + cfg, + sampler_type="SequentialSampler", + data_source=None, + batch_size=64, + n_domain=0, + n_ins=2, + tfm=None, + is_train=True, + dataset_wrapper=None +): + # Build sampler + sampler = build_sampler( + sampler_type, + cfg=cfg, + data_source=data_source, + batch_size=batch_size, + n_domain=n_domain, + n_ins=n_ins + ) + + if dataset_wrapper is None: + dataset_wrapper = DatasetWrapper + + # Build data loader + data_loader = torch.utils.data.DataLoader( + dataset_wrapper(cfg, data_source, transform=tfm, is_train=is_train), + batch_size=batch_size, + sampler=sampler, + num_workers=cfg.DATALOADER.NUM_WORKERS, + drop_last=is_train and len(data_source) >= batch_size, + pin_memory=(torch.cuda.is_available() and cfg.USE_CUDA) + ) + assert len(data_loader) > 0 + + return data_loader + + +class DataManager: + + def __init__( + self, + cfg, + custom_tfm_train=None, + custom_tfm_test=None, + dataset_wrapper=None + ): + # Load dataset + dataset = build_dataset(cfg) + + # Build transform + if custom_tfm_train is None: + tfm_train = build_transform(cfg, is_train=True) + else: + print("* Using custom transform for training") + tfm_train = custom_tfm_train + + if custom_tfm_test is None: + tfm_test = build_transform(cfg, is_train=False) + else: + print("* Using custom transform for testing") + tfm_test = custom_tfm_test + + # Build train_loader_x + train_loader_x = build_data_loader( + cfg, + sampler_type=cfg.DATALOADER.TRAIN_X.SAMPLER, + data_source=dataset.train_x, + batch_size=cfg.DATALOADER.TRAIN_X.BATCH_SIZE, + n_domain=cfg.DATALOADER.TRAIN_X.N_DOMAIN, + n_ins=cfg.DATALOADER.TRAIN_X.N_INS, + tfm=tfm_train, + is_train=True, + dataset_wrapper=dataset_wrapper + ) + + # Build train_loader_u + train_loader_u = None + if dataset.train_u: + sampler_type_ = cfg.DATALOADER.TRAIN_U.SAMPLER + batch_size_ = cfg.DATALOADER.TRAIN_U.BATCH_SIZE + n_domain_ = cfg.DATALOADER.TRAIN_U.N_DOMAIN + n_ins_ = cfg.DATALOADER.TRAIN_U.N_INS + + if cfg.DATALOADER.TRAIN_U.SAME_AS_X: + sampler_type_ = cfg.DATALOADER.TRAIN_X.SAMPLER + batch_size_ = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + n_domain_ = cfg.DATALOADER.TRAIN_X.N_DOMAIN + n_ins_ = cfg.DATALOADER.TRAIN_X.N_INS + + train_loader_u = build_data_loader( + cfg, + sampler_type=sampler_type_, + data_source=dataset.train_u, + batch_size=batch_size_, + n_domain=n_domain_, + n_ins=n_ins_, + tfm=tfm_train, + is_train=True, + dataset_wrapper=dataset_wrapper + ) + + # Build val_loader + val_loader = None + if dataset.val: + val_loader = build_data_loader( + cfg, + sampler_type=cfg.DATALOADER.TEST.SAMPLER, + data_source=dataset.val, + batch_size=cfg.DATALOADER.TEST.BATCH_SIZE, + tfm=tfm_test, + is_train=False, + dataset_wrapper=dataset_wrapper + ) + + # Build test_loader + test_loader = build_data_loader( + cfg, + sampler_type=cfg.DATALOADER.TEST.SAMPLER, + data_source=dataset.test, + batch_size=cfg.DATALOADER.TEST.BATCH_SIZE, + tfm=tfm_test, + is_train=False, + dataset_wrapper=dataset_wrapper + ) + + # Attributes + self._num_classes = dataset.num_classes + self._num_source_domains = len(cfg.DATASET.SOURCE_DOMAINS) + self._lab2cname = dataset.lab2cname + + # Dataset and data-loaders + self.dataset = dataset + self.train_loader_x = train_loader_x + self.train_loader_u = train_loader_u + self.val_loader = val_loader + self.test_loader = test_loader + + if cfg.VERBOSE: + self.show_dataset_summary(cfg) + + @property + def num_classes(self): + return self._num_classes + + @property + def num_source_domains(self): + return self._num_source_domains + + @property + def lab2cname(self): + return self._lab2cname + + def show_dataset_summary(self, cfg): + dataset_name = cfg.DATASET.NAME + source_domains = cfg.DATASET.SOURCE_DOMAINS + target_domains = cfg.DATASET.TARGET_DOMAINS + + table = [] + table.append(["Dataset", dataset_name]) + if source_domains: + table.append(["Source", source_domains]) + if target_domains: + table.append(["Target", target_domains]) + table.append(["# classes", f"{self.num_classes:,}"]) + table.append(["# train_x", f"{len(self.dataset.train_x):,}"]) + if self.dataset.train_u: + table.append(["# train_u", f"{len(self.dataset.train_u):,}"]) + if self.dataset.val: + table.append(["# val", f"{len(self.dataset.val):,}"]) + table.append(["# test", f"{len(self.dataset.test):,}"]) + + print(tabulate(table)) + + +class DatasetWrapper(TorchDataset): + + def __init__(self, cfg, data_source, transform=None, is_train=False): + self.cfg = cfg + self.data_source = data_source + self.transform = transform # accept list (tuple) as input + self.is_train = is_train + # Augmenting an image K>1 times is only allowed during training + self.k_tfm = cfg.DATALOADER.K_TRANSFORMS if is_train else 1 + self.return_img0 = cfg.DATALOADER.RETURN_IMG0 + + if self.k_tfm > 1 and transform is None: + raise ValueError( + "Cannot augment the image {} times " + "because transform is None".format(self.k_tfm) + ) + + # Build transform that doesn't apply any data augmentation + interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] + to_tensor = [] + to_tensor += [T.Resize(cfg.INPUT.SIZE, interpolation=interp_mode)] + to_tensor += [T.ToTensor()] + if "normalize" in cfg.INPUT.TRANSFORMS: + normalize = T.Normalize( + mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD + ) + to_tensor += [normalize] + self.to_tensor = T.Compose(to_tensor) + + def __len__(self): + return len(self.data_source) + + def __getitem__(self, idx): + item = self.data_source[idx] + + output = { + "label": item.label, + "domain": item.domain, + "impath": item.impath, + "index": idx + } + + img0 = read_image(item.impath) + + if self.transform is not None: + if isinstance(self.transform, (list, tuple)): + for i, tfm in enumerate(self.transform): + img = self._transform_image(tfm, img0) + keyname = "img" + if (i + 1) > 1: + keyname += str(i + 1) + output[keyname] = img + else: + img = self._transform_image(self.transform, img0) + output["img"] = img + else: + output["img"] = img0 + + if self.return_img0: + output["img0"] = self.to_tensor(img0) # without any augmentation + + return output + + def _transform_image(self, tfm, img0): + img_list = [] + + for k in range(self.k_tfm): + img_list.append(tfm(img0)) + + img = img_list + if len(img) == 1: + img = img[0] + + return img diff --git a/Dassl.pytorch/dassl/data/datasets/__init__.py b/Dassl.pytorch/dassl/data/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f58326fe091fe11d1a67c778b27c068b603f867 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/__init__.py @@ -0,0 +1,6 @@ +from .build import DATASET_REGISTRY, build_dataset # isort:skip +from .base_dataset import Datum, DatasetBase # isort:skip + +from .da import * +from .dg import * +from .ssl import * diff --git a/Dassl.pytorch/dassl/data/datasets/base_dataset.py b/Dassl.pytorch/dassl/data/datasets/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c7cafd040ae41f90e5d339bbb278db804ab89bdc --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/base_dataset.py @@ -0,0 +1,237 @@ +import os +import random +import os.path as osp +import tarfile +import zipfile +from collections import defaultdict +import gdown + +from dassl.utils import check_isfile + + +class Datum: + """Data instance which defines the basic attributes. + + Args: + impath (str): image path. + label (int): class label. + domain (int): domain label. + classname (str): class name. + """ + + def __init__(self, impath="", label=0, domain=0, classname=""): + assert isinstance(impath, str) + assert check_isfile(impath) + + self._impath = impath + self._label = label + self._domain = domain + self._classname = classname + + @property + def impath(self): + return self._impath + + @property + def label(self): + return self._label + + @property + def domain(self): + return self._domain + + @property + def classname(self): + return self._classname + + +class DatasetBase: + """A unified dataset class for + 1) domain adaptation + 2) domain generalization + 3) semi-supervised learning + """ + + dataset_dir = "" # the directory where the dataset is stored + domains = [] # string names of all domains + + def __init__(self, train_x=None, train_u=None, val=None, test=None): + self._train_x = train_x # labeled training data + self._train_u = train_u # unlabeled training data (optional) + self._val = val # validation data (optional) + self._test = test # test data + self._num_classes = self.get_num_classes(train_x) + self._lab2cname, self._classnames = self.get_lab2cname(train_x) + + @property + def train_x(self): + return self._train_x + + @property + def train_u(self): + return self._train_u + + @property + def val(self): + return self._val + + @property + def test(self): + return self._test + + @property + def lab2cname(self): + return self._lab2cname + + @property + def classnames(self): + return self._classnames + + @property + def num_classes(self): + return self._num_classes + + @staticmethod + def get_num_classes(data_source): + """Count number of classes. + + Args: + data_source (list): a list of Datum objects. + """ + label_set = set() + for item in data_source: + label_set.add(item.label) + return max(label_set) + 1 + + @staticmethod + def get_lab2cname(data_source): + """Get a label-to-classname mapping (dict). + + Args: + data_source (list): a list of Datum objects. + """ + container = set() + for item in data_source: + container.add((item.label, item.classname)) + mapping = {label: classname for label, classname in container} + labels = list(mapping.keys()) + labels.sort() + classnames = [mapping[label] for label in labels] + return mapping, classnames + + def check_input_domains(self, source_domains, target_domains): + assert len(source_domains) > 0, "source_domains (list) is empty" + assert len(target_domains) > 0, "target_domains (list) is empty" + self.is_input_domain_valid(source_domains) + self.is_input_domain_valid(target_domains) + + def is_input_domain_valid(self, input_domains): + for domain in input_domains: + if domain not in self.domains: + raise ValueError( + "Input domain must belong to {}, " + "but got [{}]".format(self.domains, domain) + ) + + def download_data(self, url, dst, from_gdrive=True): + if not osp.exists(osp.dirname(dst)): + os.makedirs(osp.dirname(dst)) + + if from_gdrive: + gdown.download(url, dst, quiet=False) + else: + raise NotImplementedError + + print("Extracting file ...") + + if dst.endswith(".zip"): + zip_ref = zipfile.ZipFile(dst, "r") + zip_ref.extractall(osp.dirname(dst)) + zip_ref.close() + + elif dst.endswith(".tar"): + tar = tarfile.open(dst, "r:") + tar.extractall(osp.dirname(dst)) + tar.close() + + elif dst.endswith(".tar.gz"): + tar = tarfile.open(dst, "r:gz") + tar.extractall(osp.dirname(dst)) + tar.close() + + else: + raise NotImplementedError + + print("File extracted to {}".format(osp.dirname(dst))) + + def generate_fewshot_dataset( + self, *data_sources, num_shots=-1, repeat=False + ): + """Generate a few-shot dataset (typically for the training set). + + This function is useful when one wants to evaluate a model + in a few-shot learning setting where each class only contains + a small number of images. + + Args: + data_sources: each individual is a list containing Datum objects. + num_shots (int): number of instances per class to sample. + repeat (bool): repeat images if needed (default: False). + """ + if num_shots < 1: + if len(data_sources) == 1: + return data_sources[0] + return data_sources + + print(f"Creating a {num_shots}-shot dataset") + + output = [] + + for data_source in data_sources: + tracker = self.split_dataset_by_label(data_source) + dataset = [] + + for label, items in tracker.items(): + if len(items) >= num_shots: + sampled_items = random.sample(items, num_shots) + else: + if repeat: + sampled_items = random.choices(items, k=num_shots) + else: + sampled_items = items + dataset.extend(sampled_items) + + output.append(dataset) + + if len(output) == 1: + return output[0] + + return output + + def split_dataset_by_label(self, data_source): + """Split a dataset, i.e. a list of Datum objects, + into class-specific groups stored in a dictionary. + + Args: + data_source (list): a list of Datum objects. + """ + output = defaultdict(list) + + for item in data_source: + output[item.label].append(item) + + return output + + def split_dataset_by_domain(self, data_source): + """Split a dataset, i.e. a list of Datum objects, + into domain-specific groups stored in a dictionary. + + Args: + data_source (list): a list of Datum objects. + """ + output = defaultdict(list) + + for item in data_source: + output[item.domain].append(item) + + return output diff --git a/Dassl.pytorch/dassl/data/datasets/build.py b/Dassl.pytorch/dassl/data/datasets/build.py new file mode 100644 index 0000000000000000000000000000000000000000..9de62c61c6f8214a61e6bdafacfed939616a4d79 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +DATASET_REGISTRY = Registry("DATASET") + + +def build_dataset(cfg): + avai_datasets = DATASET_REGISTRY.registered_names() + check_availability(cfg.DATASET.NAME, avai_datasets) + if cfg.VERBOSE: + print("Loading dataset: {}".format(cfg.DATASET.NAME)) + return DATASET_REGISTRY.get(cfg.DATASET.NAME)(cfg) diff --git a/Dassl.pytorch/dassl/data/datasets/da/__init__.py b/Dassl.pytorch/dassl/data/datasets/da/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c7b60f27dbdcd96d828d21772d8bd498f6924d7 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/__init__.py @@ -0,0 +1,7 @@ +from .digit5 import Digit5 +from .visda17 import VisDA17 +from .cifarstl import CIFARSTL +from .office31 import Office31 +from .domainnet import DomainNet +from .office_home import OfficeHome +from .mini_domainnet import miniDomainNet diff --git a/Dassl.pytorch/dassl/data/datasets/da/cifarstl.py b/Dassl.pytorch/dassl/data/datasets/da/cifarstl.py new file mode 100644 index 0000000000000000000000000000000000000000..ca27eb10e07b0131ef30a3953ea83bd8695632e2 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/cifarstl.py @@ -0,0 +1,68 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class CIFARSTL(DatasetBase): + """CIFAR-10 and STL-10. + + CIFAR-10: + - 60,000 32x32 colour images. + - 10 classes, with 6,000 images per class. + - 50,000 training images and 10,000 test images. + - URL: https://www.cs.toronto.edu/~kriz/cifar.html. + + STL-10: + - 10 classes: airplane, bird, car, cat, deer, dog, horse, + monkey, ship, truck. + - Images are 96x96 pixels, color. + - 500 training images (10 pre-defined folds), 800 test images + per class. + - URL: https://cs.stanford.edu/~acoates/stl10/. + + Reference: + - Krizhevsky. Learning Multiple Layers of Features + from Tiny Images. Tech report. + - Coates et al. An Analysis of Single Layer Networks in + Unsupervised Feature Learning. AISTATS 2011. + """ + + dataset_dir = "cifar_stl" + domains = ["cifar", "stl"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + data_dir = osp.join(self.dataset_dir, dname, split) + class_names = listdir_nohidden(data_dir) + + for class_name in class_names: + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + label = int(class_name.split("_")[0]) + + for imname in imnames: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label, domain=domain) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/da/digit5.py b/Dassl.pytorch/dassl/data/datasets/da/digit5.py new file mode 100644 index 0000000000000000000000000000000000000000..4320005aab9839630d87ceb1a864543b40e159c8 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/digit5.py @@ -0,0 +1,124 @@ +import random +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + +# Folder names for train and test sets +MNIST = {"train": "train_images", "test": "test_images"} +MNIST_M = {"train": "train_images", "test": "test_images"} +SVHN = {"train": "train_images", "test": "test_images"} +SYN = {"train": "train_images", "test": "test_images"} +USPS = {"train": "train_images", "test": "test_images"} + + +def read_image_list(im_dir, n_max=None, n_repeat=None): + items = [] + + for imname in listdir_nohidden(im_dir): + imname_noext = osp.splitext(imname)[0] + label = int(imname_noext.split("_")[1]) + impath = osp.join(im_dir, imname) + items.append((impath, label)) + + if n_max is not None: + items = random.sample(items, n_max) + + if n_repeat is not None: + items *= n_repeat + + return items + + +def load_mnist(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_mnist_m(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST_M[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_svhn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SVHN[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_syn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SYN[split]) + n_max = 25000 if split == "train" else 9000 + return read_image_list(data_dir, n_max=n_max) + + +def load_usps(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, USPS[split]) + n_repeat = 3 if split == "train" else None + return read_image_list(data_dir, n_repeat=n_repeat) + + +@DATASET_REGISTRY.register() +class Digit5(DatasetBase): + """Five digit datasets. + + It contains: + - MNIST: hand-written digits. + - MNIST-M: variant of MNIST with blended background. + - SVHN: street view house number. + - SYN: synthetic digits. + - USPS: hand-written digits, slightly different from MNIST. + + For MNIST, MNIST-M, SVHN and SYN, we randomly sample 25,000 images from + the training set and 9,000 images from the test set. For USPS which has only + 9,298 images in total, we use the entire dataset but replicate its training + set for 3 times so as to match the training set size of other domains. + + Reference: + - Lecun et al. Gradient-based learning applied to document + recognition. IEEE 1998. + - Ganin et al. Domain-adversarial training of neural networks. + JMLR 2016. + - Netzer et al. Reading digits in natural images with unsupervised + feature learning. NIPS-W 2011. + """ + + dataset_dir = "digit5" + domains = ["mnist", "mnist_m", "svhn", "syn", "usps"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + func = "load_" + dname + domain_dir = osp.join(self.dataset_dir, dname) + items_d = eval(func)(domain_dir, split=split) + + for impath, label in items_d: + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=str(label) + ) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/da/domainnet.py b/Dassl.pytorch/dassl/data/datasets/da/domainnet.py new file mode 100644 index 0000000000000000000000000000000000000000..8a703bf1bcf3a355f65c270678a35fe6778b5b3f --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/domainnet.py @@ -0,0 +1,69 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class DomainNet(DatasetBase): + """DomainNet. + + Statistics: + - 6 distinct domains: Clipart, Infograph, Painting, Quickdraw, + Real, Sketch. + - Around 0.6M images. + - 345 categories. + - URL: http://ai.bu.edu/M3SDA/. + + Special note: the t-shirt class (327) is missing in painting_train.txt. + + Reference: + - Peng et al. Moment Matching for Multi-Source Domain + Adaptation. ICCV 2019. + """ + + dataset_dir = "domainnet" + domains = [ + "clipart", "infograph", "painting", "quickdraw", "real", "sketch" + ] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + self.split_dir = osp.join(self.dataset_dir, "splits") + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="test") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, val=val, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + filename = dname + "_" + split + ".txt" + split_file = osp.join(self.split_dir, filename) + + with open(split_file, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip() + impath, label = line.split(" ") + classname = impath.split("/")[1] + impath = osp.join(self.dataset_dir, impath) + label = int(label) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/da/mini_domainnet.py b/Dassl.pytorch/dassl/data/datasets/da/mini_domainnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4a708691444d658298c4c44be1209da258b6f62d --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/mini_domainnet.py @@ -0,0 +1,58 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class miniDomainNet(DatasetBase): + """A subset of DomainNet. + + Reference: + - Peng et al. Moment Matching for Multi-Source Domain + Adaptation. ICCV 2019. + - Zhou et al. Domain Adaptive Ensemble Learning. + """ + + dataset_dir = "domainnet" + domains = ["clipart", "painting", "real", "sketch"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + self.split_dir = osp.join(self.dataset_dir, "splits_mini") + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="train") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + filename = dname + "_" + split + ".txt" + split_file = osp.join(self.split_dir, filename) + + with open(split_file, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip() + impath, label = line.split(" ") + classname = impath.split("/")[1] + impath = osp.join(self.dataset_dir, impath) + label = int(label) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/da/office31.py b/Dassl.pytorch/dassl/data/datasets/da/office31.py new file mode 100644 index 0000000000000000000000000000000000000000..c2daca1d4bc5c7629da237da194941010047ee3a --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/office31.py @@ -0,0 +1,63 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class Office31(DatasetBase): + """Office-31. + + Statistics: + - 4,110 images. + - 31 classes related to office objects. + - 3 domains: Amazon, Webcam, Dslr. + - URL: https://people.eecs.berkeley.edu/~jhoffman/domainadapt/. + + Reference: + - Saenko et al. Adapting visual category models to + new domains. ECCV 2010. + """ + + dataset_dir = "office31" + domains = ["amazon", "webcam", "dslr"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS) + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS) + test = self._read_data(cfg.DATASET.TARGET_DOMAINS) + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains): + items = [] + + for domain, dname in enumerate(input_domains): + domain_dir = osp.join(self.dataset_dir, dname) + class_names = listdir_nohidden(domain_dir) + class_names.sort() + + for label, class_name in enumerate(class_names): + class_path = osp.join(domain_dir, class_name) + imnames = listdir_nohidden(class_path) + + for imname in imnames: + impath = osp.join(class_path, imname) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=class_name + ) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/da/office_home.py b/Dassl.pytorch/dassl/data/datasets/da/office_home.py new file mode 100644 index 0000000000000000000000000000000000000000..61996f2f557ee2cb324a387af1d86d57f3db269f --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/office_home.py @@ -0,0 +1,63 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class OfficeHome(DatasetBase): + """Office-Home. + + Statistics: + - Around 15,500 images. + - 65 classes related to office and home objects. + - 4 domains: Art, Clipart, Product, Real World. + - URL: http://hemanthdv.org/OfficeHome-Dataset/. + + Reference: + - Venkateswara et al. Deep Hashing Network for Unsupervised + Domain Adaptation. CVPR 2017. + """ + + dataset_dir = "office_home" + domains = ["art", "clipart", "product", "real_world"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data(cfg.DATASET.SOURCE_DOMAINS) + train_u = self._read_data(cfg.DATASET.TARGET_DOMAINS) + test = self._read_data(cfg.DATASET.TARGET_DOMAINS) + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, input_domains): + items = [] + + for domain, dname in enumerate(input_domains): + domain_dir = osp.join(self.dataset_dir, dname) + class_names = listdir_nohidden(domain_dir) + class_names.sort() + + for label, class_name in enumerate(class_names): + class_path = osp.join(domain_dir, class_name) + imnames = listdir_nohidden(class_path) + + for imname in imnames: + impath = osp.join(class_path, imname) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=class_name.lower(), + ) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/da/visda17.py b/Dassl.pytorch/dassl/data/datasets/da/visda17.py new file mode 100644 index 0000000000000000000000000000000000000000..48c1045e67e59624c6d3a17cb1bf6eb85079b1da --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/da/visda17.py @@ -0,0 +1,61 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class VisDA17(DatasetBase): + """VisDA17. + + Focusing on simulation-to-reality domain shift. + + URL: http://ai.bu.edu/visda-2017/. + + Reference: + - Peng et al. VisDA: The Visual Domain Adaptation + Challenge. ArXiv 2017. + """ + + dataset_dir = "visda17" + domains = ["synthetic", "real"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train_x = self._read_data("synthetic") + train_u = self._read_data("real") + test = self._read_data("real") + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data(self, dname): + filedir = "train" if dname == "synthetic" else "validation" + image_list = osp.join(self.dataset_dir, filedir, "image_list.txt") + items = [] + # There is only one source domain + domain = 0 + + with open(image_list, "r") as f: + lines = f.readlines() + + for line in lines: + line = line.strip() + impath, label = line.split(" ") + classname = impath.split("/")[0] + impath = osp.join(self.dataset_dir, filedir, impath) + label = int(label) + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/dg/__init__.py b/Dassl.pytorch/dassl/data/datasets/dg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b94c35cd9f8db817f343d398f3ada76fe0828888 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/__init__.py @@ -0,0 +1,7 @@ +from .pacs import PACS +from .vlcs import VLCS +from .wilds import * +from .cifar_c import CIFAR10C, CIFAR100C +from .digits_dg import DigitsDG +from .digit_single import DigitSingle +from .office_home_dg import OfficeHomeDG diff --git a/Dassl.pytorch/dassl/data/datasets/dg/cifar_c.py b/Dassl.pytorch/dassl/data/datasets/dg/cifar_c.py new file mode 100644 index 0000000000000000000000000000000000000000..7d1e4f38081f99a4005ebe295b8722afbc1197e5 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/cifar_c.py @@ -0,0 +1,123 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + +AVAI_C_TYPES = [ + "brightness", + "contrast", + "defocus_blur", + "elastic_transform", + "fog", + "frost", + "gaussian_blur", + "gaussian_noise", + "glass_blur", + "impulse_noise", + "jpeg_compression", + "motion_blur", + "pixelate", + "saturate", + "shot_noise", + "snow", + "spatter", + "speckle_noise", + "zoom_blur", +] + + +@DATASET_REGISTRY.register() +class CIFAR10C(DatasetBase): + """CIFAR-10 -> CIFAR-10-C. + + Dataset link: https://zenodo.org/record/2535967#.YFwtV2Qzb0o + + Statistics: + - 2 domains: the normal CIFAR-10 vs. a corrupted CIFAR-10 + - 10 categories + + Reference: + - Hendrycks et al. Benchmarking neural network robustness + to common corruptions and perturbations. ICLR 2019. + """ + + dataset_dir = "" + domains = ["cifar10", "cifar10_c"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = root + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + source_domain = cfg.DATASET.SOURCE_DOMAINS[0] + target_domain = cfg.DATASET.TARGET_DOMAINS[0] + assert source_domain == self.domains[0] + assert target_domain == self.domains[1] + + c_type = cfg.DATASET.CIFAR_C_TYPE + c_level = cfg.DATASET.CIFAR_C_LEVEL + + if not c_type: + raise ValueError( + "Please specify DATASET.CIFAR_C_TYPE in the config file" + ) + + assert ( + c_type in AVAI_C_TYPES + ), f'C_TYPE is expected to belong to {AVAI_C_TYPES}, but got "{c_type}"' + assert 1 <= c_level <= 5 + + train_dir = osp.join(self.dataset_dir, source_domain, "train") + test_dir = osp.join( + self.dataset_dir, target_domain, c_type, str(c_level) + ) + + if not osp.exists(test_dir): + raise ValueError + + train = self._read_data(train_dir) + test = self._read_data(test_dir) + + super().__init__(train_x=train, test=test) + + def _read_data(self, data_dir): + class_names = listdir_nohidden(data_dir) + class_names.sort() + items = [] + + for label, class_name in enumerate(class_names): + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + + for imname in imnames: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label, domain=0) + items.append(item) + + return items + + +@DATASET_REGISTRY.register() +class CIFAR100C(CIFAR10C): + """CIFAR-100 -> CIFAR-100-C. + + Dataset link: https://zenodo.org/record/3555552#.YFxpQmQzb0o + + Statistics: + - 2 domains: the normal CIFAR-100 vs. a corrupted CIFAR-100 + - 10 categories + + Reference: + - Hendrycks et al. Benchmarking neural network robustness + to common corruptions and perturbations. ICLR 2019. + """ + + dataset_dir = "" + domains = ["cifar100", "cifar100_c"] + + def __init__(self, cfg): + super().__init__(cfg) diff --git a/Dassl.pytorch/dassl/data/datasets/dg/digit_single.py b/Dassl.pytorch/dassl/data/datasets/dg/digit_single.py new file mode 100644 index 0000000000000000000000000000000000000000..5490e92f02a912a60ceac21afbcb2388f1bcaaa3 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/digit_single.py @@ -0,0 +1,124 @@ +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + +# Folder names for train and test sets +MNIST = {"train": "train_images", "test": "test_images"} +MNIST_M = {"train": "train_images", "test": "test_images"} +SVHN = {"train": "train_images", "test": "test_images"} +SYN = {"train": "train_images", "test": "test_images"} +USPS = {"train": "train_images", "test": "test_images"} + + +def read_image_list(im_dir, n_max=None, n_repeat=None): + items = [] + + for imname in listdir_nohidden(im_dir): + imname_noext = osp.splitext(imname)[0] + label = int(imname_noext.split("_")[1]) + impath = osp.join(im_dir, imname) + items.append((impath, label)) + + if n_max is not None: + # Note that the sampling process is NOT random, + # which follows that in Volpi et al. NIPS'18. + items = items[:n_max] + + if n_repeat is not None: + items *= n_repeat + + return items + + +def load_mnist(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_mnist_m(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, MNIST_M[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_svhn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SVHN[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_syn(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, SYN[split]) + n_max = 10000 if split == "train" else None + return read_image_list(data_dir, n_max=n_max) + + +def load_usps(dataset_dir, split="train"): + data_dir = osp.join(dataset_dir, USPS[split]) + return read_image_list(data_dir) + + +@DATASET_REGISTRY.register() +class DigitSingle(DatasetBase): + """Digit recognition datasets for single-source domain generalization. + + There are five digit datasets: + - MNIST: hand-written digits. + - MNIST-M: variant of MNIST with blended background. + - SVHN: street view house number. + - SYN: synthetic digits. + - USPS: hand-written digits, slightly different from MNIST. + + Protocol: + Volpi et al. train a model using 10,000 images from MNIST and + evaluate the model on the test split of the other four datasets. However, + the code does not restrict you to only use MNIST as the source dataset. + Instead, you can use any dataset as the source. But note that only 10,000 + images will be sampled from the source dataset for training. + + Reference: + - Lecun et al. Gradient-based learning applied to document + recognition. IEEE 1998. + - Ganin et al. Domain-adversarial training of neural networks. + JMLR 2016. + - Netzer et al. Reading digits in natural images with unsupervised + feature learning. NIPS-W 2011. + - Volpi et al. Generalizing to Unseen Domains via Adversarial Data + Augmentation. NIPS 2018. + """ + + # Reuse the digit-5 folder instead of creating a new folder + dataset_dir = "digit5" + domains = ["mnist", "mnist_m", "svhn", "syn", "usps"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, split="test") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, split="test") + + super().__init__(train_x=train, val=val, test=test) + + def _read_data(self, input_domains, split="train"): + items = [] + + for domain, dname in enumerate(input_domains): + func = "load_" + dname + domain_dir = osp.join(self.dataset_dir, dname) + items_d = eval(func)(domain_dir, split=split) + + for impath, label in items_d: + item = Datum(impath=impath, label=label, domain=domain) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/dg/digits_dg.py b/Dassl.pytorch/dassl/data/datasets/dg/digits_dg.py new file mode 100644 index 0000000000000000000000000000000000000000..43ccd6f49aa4313f5a4aa6f078682abf9ed7b045 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/digits_dg.py @@ -0,0 +1,97 @@ +import glob +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class DigitsDG(DatasetBase): + """Digits-DG. + + It contains 4 digit datasets: + - MNIST: hand-written digits. + - MNIST-M: variant of MNIST with blended background. + - SVHN: street view house number. + - SYN: synthetic digits. + + Reference: + - Lecun et al. Gradient-based learning applied to document + recognition. IEEE 1998. + - Ganin et al. Domain-adversarial training of neural networks. + JMLR 2016. + - Netzer et al. Reading digits in natural images with unsupervised + feature learning. NIPS-W 2011. + - Zhou et al. Deep Domain-Adversarial Image Generation for Domain + Generalisation. AAAI 2020. + """ + + dataset_dir = "digits_dg" + domains = ["mnist", "mnist_m", "svhn", "syn"] + data_url = "https://drive.google.com/uc?id=15V7EsHfCcfbKgsDmzQKj_DfXt_XYp_P7" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "digits_dg.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "train" + ) + val = self.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "val" + ) + test = self.read_data( + self.dataset_dir, cfg.DATASET.TARGET_DOMAINS, "all" + ) + + super().__init__(train_x=train, val=val, test=test) + + @staticmethod + def read_data(dataset_dir, input_domains, split): + + def _load_data_from_directory(directory): + folders = listdir_nohidden(directory) + folders.sort() + items_ = [] + + for label, folder in enumerate(folders): + impaths = glob.glob(osp.join(directory, folder, "*.jpg")) + + for impath in impaths: + items_.append((impath, label)) + + return items_ + + items = [] + + for domain, dname in enumerate(input_domains): + if split == "all": + train_dir = osp.join(dataset_dir, dname, "train") + impath_label_list = _load_data_from_directory(train_dir) + val_dir = osp.join(dataset_dir, dname, "val") + impath_label_list += _load_data_from_directory(val_dir) + else: + split_dir = osp.join(dataset_dir, dname, split) + impath_label_list = _load_data_from_directory(split_dir) + + for impath, label in impath_label_list: + class_name = impath.split("/")[-2].lower() + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=class_name + ) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/dg/office_home_dg.py b/Dassl.pytorch/dassl/data/datasets/dg/office_home_dg.py new file mode 100644 index 0000000000000000000000000000000000000000..ef08754b70104d347504e5640f1f7c561d179b0e --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/office_home_dg.py @@ -0,0 +1,49 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from .digits_dg import DigitsDG +from ..base_dataset import DatasetBase + + +@DATASET_REGISTRY.register() +class OfficeHomeDG(DatasetBase): + """Office-Home. + + Statistics: + - Around 15,500 images. + - 65 classes related to office and home objects. + - 4 domains: Art, Clipart, Product, Real World. + - URL: http://hemanthdv.org/OfficeHome-Dataset/. + + Reference: + - Venkateswara et al. Deep Hashing Network for Unsupervised + Domain Adaptation. CVPR 2017. + """ + + dataset_dir = "office_home_dg" + domains = ["art", "clipart", "product", "real_world"] + data_url = "https://drive.google.com/uc?id=1gkbf_KaxoBws-GWT3XIPZ7BnkqbAxIFa" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "office_home_dg.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = DigitsDG.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "train" + ) + val = DigitsDG.read_data( + self.dataset_dir, cfg.DATASET.SOURCE_DOMAINS, "val" + ) + test = DigitsDG.read_data( + self.dataset_dir, cfg.DATASET.TARGET_DOMAINS, "all" + ) + + super().__init__(train_x=train, val=val, test=test) diff --git a/Dassl.pytorch/dassl/data/datasets/dg/pacs.py b/Dassl.pytorch/dassl/data/datasets/dg/pacs.py new file mode 100644 index 0000000000000000000000000000000000000000..e0159d490d8db7afdb354a1fd19e543e6eaf97c3 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/pacs.py @@ -0,0 +1,94 @@ +import os.path as osp + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class PACS(DatasetBase): + """PACS. + + Statistics: + - 4 domains: Photo (1,670), Art (2,048), Cartoon + (2,344), Sketch (3,929). + - 7 categories: dog, elephant, giraffe, guitar, horse, + house and person. + + Reference: + - Li et al. Deeper, broader and artier domain generalization. + ICCV 2017. + """ + + dataset_dir = "pacs" + domains = ["art_painting", "cartoon", "photo", "sketch"] + data_url = "https://drive.google.com/uc?id=1m4X4fROCCXMO0lRLrr6Zz9Vb3974NWhE" + # the following images contain errors and should be ignored + _error_paths = ["sketch/dog/n02103406_4068-1.png"] + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + self.image_dir = osp.join(self.dataset_dir, "images") + self.split_dir = osp.join(self.dataset_dir, "splits") + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "pacs.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "crossval") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, "all") + + super().__init__(train_x=train, val=val, test=test) + + def _read_data(self, input_domains, split): + items = [] + + for domain, dname in enumerate(input_domains): + if split == "all": + file_train = osp.join( + self.split_dir, dname + "_train_kfold.txt" + ) + impath_label_list = self._read_split_pacs(file_train) + file_val = osp.join( + self.split_dir, dname + "_crossval_kfold.txt" + ) + impath_label_list += self._read_split_pacs(file_val) + else: + file = osp.join( + self.split_dir, dname + "_" + split + "_kfold.txt" + ) + impath_label_list = self._read_split_pacs(file) + + for impath, label in impath_label_list: + classname = impath.split("/")[-2] + item = Datum( + impath=impath, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + return items + + def _read_split_pacs(self, split_file): + items = [] + + with open(split_file, "r") as f: + lines = f.readlines() + + for line in lines: + line = line.strip() + impath, label = line.split(" ") + if impath in self._error_paths: + continue + impath = osp.join(self.image_dir, impath) + label = int(label) - 1 + items.append((impath, label)) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/dg/vlcs.py b/Dassl.pytorch/dassl/data/datasets/dg/vlcs.py new file mode 100644 index 0000000000000000000000000000000000000000..77218e2fc5f03641a291bf3d39ea4c3536883a4f --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/vlcs.py @@ -0,0 +1,60 @@ +import glob +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class VLCS(DatasetBase): + """VLCS. + + Statistics: + - 4 domains: CALTECH, LABELME, PASCAL, SUN + - 5 categories: bird, car, chair, dog, and person. + + Reference: + - Torralba and Efros. Unbiased look at dataset bias. CVPR 2011. + """ + + dataset_dir = "VLCS" + domains = ["caltech", "labelme", "pascal", "sun"] + data_url = "https://drive.google.com/uc?id=1r0WL5DDqKfSPp9E3tRENwHaXNs1olLZd" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + + if not osp.exists(self.dataset_dir): + dst = osp.join(root, "vlcs.zip") + self.download_data(self.data_url, dst, from_gdrive=True) + + self.check_input_domains( + cfg.DATASET.SOURCE_DOMAINS, cfg.DATASET.TARGET_DOMAINS + ) + + train = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "train") + val = self._read_data(cfg.DATASET.SOURCE_DOMAINS, "crossval") + test = self._read_data(cfg.DATASET.TARGET_DOMAINS, "test") + + super().__init__(train_x=train, val=val, test=test) + + def _read_data(self, input_domains, split): + items = [] + + for domain, dname in enumerate(input_domains): + dname = dname.upper() + path = osp.join(self.dataset_dir, dname, split) + folders = listdir_nohidden(path) + folders.sort() + + for label, folder in enumerate(folders): + impaths = glob.glob(osp.join(path, folder, "*.jpg")) + + for impath in impaths: + item = Datum(impath=impath, label=label, domain=domain) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py b/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2898f7ccef7122055b2fd7f1553a1e8630875ba0 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/wilds/__init__.py @@ -0,0 +1,3 @@ +from .fmow import FMoW +from .iwildcam import IWildCam +from .camelyon17 import Camelyon17 diff --git a/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py b/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py new file mode 100644 index 0000000000000000000000000000000000000000..fade5ebce820fc3a15be79d21b38cc40ed9d9e2d --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/wilds/camelyon17.py @@ -0,0 +1,24 @@ +from dassl.data.datasets import DATASET_REGISTRY + +from .wilds_base import WILDSBase + + +@DATASET_REGISTRY.register() +class Camelyon17(WILDSBase): + """Tumor tissue recognition. + + 2 classes (whether a given region of tissue contains tumor tissue). + + Reference: + - Bandi et al. "From detection of individual metastases to classification of lymph + node status at the patient level: the CAMELYON17 challenge." TMI 2021. + - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. + """ + + dataset_dir = "camelyon17_v1.0" + + def __init__(self, cfg): + super().__init__(cfg) + + def load_classnames(self): + return {0: "healthy tissue", 1: "tumor tissue"} diff --git a/Dassl.pytorch/dassl/data/datasets/dg/wilds/fmow.py b/Dassl.pytorch/dassl/data/datasets/dg/wilds/fmow.py new file mode 100644 index 0000000000000000000000000000000000000000..d7398e0533ebbb234bd3c294883a366930545bf3 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/wilds/fmow.py @@ -0,0 +1,57 @@ +import os.path as osp + +from dassl.data.datasets import DATASET_REGISTRY + +from .wilds_base import WILDSBase + +CATEGORIES = [ + "airport", "airport_hangar", "airport_terminal", "amusement_park", + "aquaculture", "archaeological_site", "barn", "border_checkpoint", + "burial_site", "car_dealership", "construction_site", "crop_field", "dam", + "debris_or_rubble", "educational_institution", "electric_substation", + "factory_or_powerplant", "fire_station", "flooded_road", "fountain", + "gas_station", "golf_course", "ground_transportation_station", "helipad", + "hospital", "impoverished_settlement", "interchange", "lake_or_pond", + "lighthouse", "military_facility", "multi-unit_residential", + "nuclear_powerplant", "office_building", "oil_or_gas_facility", "park", + "parking_lot_or_garage", "place_of_worship", "police_station", "port", + "prison", "race_track", "railway_bridge", "recreational_facility", + "road_bridge", "runway", "shipyard", "shopping_mall", + "single-unit_residential", "smokestack", "solar_farm", "space_facility", + "stadium", "storage_tank", "surface_mine", "swimming_pool", "toll_booth", + "tower", "tunnel_opening", "waste_disposal", "water_treatment_facility", + "wind_farm", "zoo" +] + + +@DATASET_REGISTRY.register() +class FMoW(WILDSBase): + """Satellite imagery classification. + + 62 classes (building or land use categories). + + Reference: + - Christie et al. "Functional Map of the World." CVPR 2018. + - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. + """ + + dataset_dir = "fmow_v1.1" + + def __init__(self, cfg): + super().__init__(cfg) + + def get_image_path(self, dataset, idx): + idx = dataset.full_idxs[idx] + image_name = f"rgb_img_{idx}.png" + image_path = osp.join(self.dataset_dir, "images", image_name) + return image_path + + def get_domain(self, dataset, idx): + # number of regions: 5 or 6 + # number of years: 16 + region_id = int(dataset.metadata_array[idx][0]) + year_id = int(dataset.metadata_array[idx][1]) + return region_id*16 + year_id + + def load_classnames(self): + return {i: cat for i, cat in enumerate(CATEGORIES)} diff --git a/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py b/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py new file mode 100644 index 0000000000000000000000000000000000000000..3d1f016c2a36feec52f968ffab376b81d0cb05e7 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/wilds/iwildcam.py @@ -0,0 +1,32 @@ +import os.path as osp +import pandas as pd + +from dassl.data.datasets import DATASET_REGISTRY + +from .wilds_base import WILDSBase + + +@DATASET_REGISTRY.register() +class IWildCam(WILDSBase): + """Animal species recognition. + + 182 classes (species). + + Reference: + - Beery et al. "The iwildcam 2021 competition dataset." arXiv 2021. + - Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021. + """ + + dataset_dir = "iwildcam_v2.0" + + def __init__(self, cfg): + super().__init__(cfg) + + def get_image_path(self, dataset, idx): + image_name = dataset._input_array[idx] + image_path = osp.join(self.dataset_dir, "train", image_name) + return image_path + + def load_classnames(self): + df = pd.read_csv(osp.join(self.dataset_dir, "categories.csv")) + return dict(df["name"]) diff --git a/Dassl.pytorch/dassl/data/datasets/dg/wilds/wilds_base.py b/Dassl.pytorch/dassl/data/datasets/dg/wilds/wilds_base.py new file mode 100644 index 0000000000000000000000000000000000000000..33232e1ef869ce5235b230b77c2d6b41d7192d25 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/dg/wilds/wilds_base.py @@ -0,0 +1,110 @@ +import logging # isort:skip +logging.disable(logging.WARNING) # isort:skip + +import pickle +import logging +import os.path as osp +from wilds import get_dataset as wilds_get_dataset + +from dassl.data.datasets import Datum, DatasetBase + + +class WILDSBase(DatasetBase): + + dataset_dir = "" + relabel_domain = True + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + name = self.dataset_dir.split("_")[0] + self.dataset_dir = osp.join(root, self.dataset_dir) + self.preloaded = osp.join(self.dataset_dir, "zhou_preloaded.pkl") + + self.label_to_name = self.load_classnames() + assert isinstance(self.label_to_name, dict) + + if osp.exists(self.preloaded): + with open(self.preloaded, "rb") as file: + dataset = pickle.load(file) + train = dataset["train"] + val = dataset["val"] + test = dataset["test"] + else: + dataset = wilds_get_dataset( + dataset=name, root_dir=root, download=True + ) + subset_train = dataset.get_subset("train") + subset_val = dataset.get_subset("val") + subset_test = dataset.get_subset("test") + + train = self.read_data(subset_train) + val = self.read_data(subset_val) + test = self.read_data(subset_test) + + # Save time for data loading next time + preloaded = {"train": train, "val": val, "test": test} + with open(self.preloaded, "wb") as file: + pickle.dump(preloaded, file, protocol=pickle.HIGHEST_PROTOCOL) + + # Few-shot learning + k = cfg.DATASET.NUM_SHOTS + if k > 0: + groups = self.split_dataset_by_domain(train) + groups = list(groups.values()) + groups = self.generate_fewshot_dataset(*groups, num_shots=k) + train = [] + for group in groups: + train.extend(group) + + super().__init__(train_x=train, val=val, test=test) + + def load_classnames(self): + raise NotImplementedError + + def get_image_path(self, dataset, idx): + image_name = dataset._input_array[idx] + image_path = osp.join(self.dataset_dir, image_name) + return image_path + + def get_label(self, dataset, idx): + return int(dataset.y_array[idx]) + + def get_domain(self, dataset, idx): + return int(dataset.metadata_array[idx][0]) + + def read_data(self, subset): + items = [] + indices = subset.indices + dataset = subset.dataset + + for idx in indices: + image_path = self.get_image_path(dataset, idx) + label = self.get_label(dataset, idx) + domain = self.get_domain(dataset, idx) + classname = self.label_to_name[label] + item = Datum( + impath=image_path, + label=label, + domain=domain, + classname=classname + ) + items.append(item) + + if self.relabel_domain: + domains = set([item.domain for item in items]) + mapping = {domain: i for i, domain in enumerate(domains)} + + items_new = [] + + for item in items: + item_new = Datum( + impath=item.impath, + label=item.label, + domain=mapping[item.domain], + classname=item.classname + ) + items_new.append(item_new) + + return items_new + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py b/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a6607dccd9e2dfe4132ebe31cd8613dba0a03848 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/ssl/__init__.py @@ -0,0 +1,3 @@ +from .svhn import SVHN +from .cifar import CIFAR10, CIFAR100 +from .stl10 import STL10 diff --git a/Dassl.pytorch/dassl/data/datasets/ssl/cifar.py b/Dassl.pytorch/dassl/data/datasets/ssl/cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..55845279ca23f532f07d4f28ce062ff3dc927f33 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/ssl/cifar.py @@ -0,0 +1,108 @@ +import math +import random +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class CIFAR10(DatasetBase): + """CIFAR10 for SSL. + + Reference: + - Krizhevsky. Learning Multiple Layers of Features + from Tiny Images. Tech report. + """ + + dataset_dir = "cifar10" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + train_dir = osp.join(self.dataset_dir, "train") + test_dir = osp.join(self.dataset_dir, "test") + + assert cfg.DATASET.NUM_LABELED > 0 + + train_x, train_u, val = self._read_data_train( + train_dir, cfg.DATASET.NUM_LABELED, cfg.DATASET.VAL_PERCENT + ) + test = self._read_data_test(test_dir) + + if cfg.DATASET.ALL_AS_UNLABELED: + train_u = train_u + train_x + + if len(val) == 0: + val = None + + super().__init__(train_x=train_x, train_u=train_u, val=val, test=test) + + def _read_data_train(self, data_dir, num_labeled, val_percent): + class_names = listdir_nohidden(data_dir) + class_names.sort() + num_labeled_per_class = num_labeled / len(class_names) + items_x, items_u, items_v = [], [], [] + + for label, class_name in enumerate(class_names): + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + + # Split into train and val following Oliver et al. 2018 + # Set cfg.DATASET.VAL_PERCENT to 0 to not use val data + num_val = math.floor(len(imnames) * val_percent) + imnames_train = imnames[num_val:] + imnames_val = imnames[:num_val] + + # Note we do shuffle after split + random.shuffle(imnames_train) + + for i, imname in enumerate(imnames_train): + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label) + + if (i + 1) <= num_labeled_per_class: + items_x.append(item) + + else: + items_u.append(item) + + for imname in imnames_val: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label) + items_v.append(item) + + return items_x, items_u, items_v + + def _read_data_test(self, data_dir): + class_names = listdir_nohidden(data_dir) + class_names.sort() + items = [] + + for label, class_name in enumerate(class_names): + class_dir = osp.join(data_dir, class_name) + imnames = listdir_nohidden(class_dir) + + for imname in imnames: + impath = osp.join(class_dir, imname) + item = Datum(impath=impath, label=label) + items.append(item) + + return items + + +@DATASET_REGISTRY.register() +class CIFAR100(CIFAR10): + """CIFAR100 for SSL. + + Reference: + - Krizhevsky. Learning Multiple Layers of Features + from Tiny Images. Tech report. + """ + + dataset_dir = "cifar100" + + def __init__(self, cfg): + super().__init__(cfg) diff --git a/Dassl.pytorch/dassl/data/datasets/ssl/stl10.py b/Dassl.pytorch/dassl/data/datasets/ssl/stl10.py new file mode 100644 index 0000000000000000000000000000000000000000..6a1f9f2d08557c715cbad7340206e27c1c490c13 --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/ssl/stl10.py @@ -0,0 +1,87 @@ +import numpy as np +import os.path as osp + +from dassl.utils import listdir_nohidden + +from ..build import DATASET_REGISTRY +from ..base_dataset import Datum, DatasetBase + + +@DATASET_REGISTRY.register() +class STL10(DatasetBase): + """STL-10 dataset. + + Description: + - 10 classes: airplane, bird, car, cat, deer, dog, horse, + monkey, ship, truck. + - Images are 96x96 pixels, color. + - 500 training images per class, 800 test images per class. + - 100,000 unlabeled images for unsupervised learning. + + Reference: + - Coates et al. An Analysis of Single Layer Networks in + Unsupervised Feature Learning. AISTATS 2011. + """ + + dataset_dir = "stl10" + + def __init__(self, cfg): + root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = osp.join(root, self.dataset_dir) + train_dir = osp.join(self.dataset_dir, "train") + test_dir = osp.join(self.dataset_dir, "test") + unlabeled_dir = osp.join(self.dataset_dir, "unlabeled") + fold_file = osp.join( + self.dataset_dir, "stl10_binary", "fold_indices.txt" + ) + + # Only use the first five splits + assert 0 <= cfg.DATASET.STL10_FOLD <= 4 + + train_x = self._read_data_train( + train_dir, cfg.DATASET.STL10_FOLD, fold_file + ) + train_u = self._read_data_all(unlabeled_dir) + test = self._read_data_all(test_dir) + + if cfg.DATASET.ALL_AS_UNLABELED: + train_u = train_u + train_x + + super().__init__(train_x=train_x, train_u=train_u, test=test) + + def _read_data_train(self, data_dir, fold, fold_file): + imnames = listdir_nohidden(data_dir) + imnames.sort() + items = [] + + list_idx = list(range(len(imnames))) + if fold >= 0: + with open(fold_file, "r") as f: + str_idx = f.read().splitlines()[fold] + list_idx = np.fromstring(str_idx, dtype=np.uint8, sep=" ") + + for i in list_idx: + imname = imnames[i] + impath = osp.join(data_dir, imname) + label = osp.splitext(imname)[0].split("_")[1] + label = int(label) + item = Datum(impath=impath, label=label) + items.append(item) + + return items + + def _read_data_all(self, data_dir): + imnames = listdir_nohidden(data_dir) + items = [] + + for imname in imnames: + impath = osp.join(data_dir, imname) + label = osp.splitext(imname)[0].split("_")[1] + if label == "none": + label = -1 + else: + label = int(label) + item = Datum(impath=impath, label=label) + items.append(item) + + return items diff --git a/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py b/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py new file mode 100644 index 0000000000000000000000000000000000000000..15e0de5681aa355830316a75b2fdf8db65be474b --- /dev/null +++ b/Dassl.pytorch/dassl/data/datasets/ssl/svhn.py @@ -0,0 +1,17 @@ +from .cifar import CIFAR10 +from ..build import DATASET_REGISTRY + + +@DATASET_REGISTRY.register() +class SVHN(CIFAR10): + """SVHN for SSL. + + Reference: + - Netzer et al. Reading Digits in Natural Images with + Unsupervised Feature Learning. NIPS-W 2011. + """ + + dataset_dir = "svhn" + + def __init__(self, cfg): + super().__init__(cfg) diff --git a/Dassl.pytorch/dassl/data/samplers.py b/Dassl.pytorch/dassl/data/samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..562bfbcaf25b21a397a407098951a706ccffb45e --- /dev/null +++ b/Dassl.pytorch/dassl/data/samplers.py @@ -0,0 +1,205 @@ +import copy +import numpy as np +import random +from collections import defaultdict +from torch.utils.data.sampler import Sampler, RandomSampler, SequentialSampler + + +class RandomDomainSampler(Sampler): + """Randomly samples N domains each with K images + to form a minibatch of size N*K. + + Args: + data_source (list): list of Datums. + batch_size (int): batch size. + n_domain (int): number of domains to sample in a minibatch. + """ + + def __init__(self, data_source, batch_size, n_domain): + self.data_source = data_source + + # Keep track of image indices for each domain + self.domain_dict = defaultdict(list) + for i, item in enumerate(data_source): + self.domain_dict[item.domain].append(i) + self.domains = list(self.domain_dict.keys()) + + # Make sure each domain has equal number of images + if n_domain is None or n_domain <= 0: + n_domain = len(self.domains) + assert batch_size % n_domain == 0 + self.n_img_per_domain = batch_size // n_domain + + self.batch_size = batch_size + # n_domain denotes number of domains sampled in a minibatch + self.n_domain = n_domain + self.length = len(list(self.__iter__())) + + def __iter__(self): + domain_dict = copy.deepcopy(self.domain_dict) + final_idxs = [] + stop_sampling = False + + while not stop_sampling: + selected_domains = random.sample(self.domains, self.n_domain) + + for domain in selected_domains: + idxs = domain_dict[domain] + selected_idxs = random.sample(idxs, self.n_img_per_domain) + final_idxs.extend(selected_idxs) + + for idx in selected_idxs: + domain_dict[domain].remove(idx) + + remaining = len(domain_dict[domain]) + if remaining < self.n_img_per_domain: + stop_sampling = True + + return iter(final_idxs) + + def __len__(self): + return self.length + + +class SeqDomainSampler(Sampler): + """Sequential domain sampler, which randomly samples K + images from each domain to form a minibatch. + + Args: + data_source (list): list of Datums. + batch_size (int): batch size. + """ + + def __init__(self, data_source, batch_size): + self.data_source = data_source + + # Keep track of image indices for each domain + self.domain_dict = defaultdict(list) + for i, item in enumerate(data_source): + self.domain_dict[item.domain].append(i) + self.domains = list(self.domain_dict.keys()) + self.domains.sort() + + # Make sure each domain has equal number of images + n_domain = len(self.domains) + assert batch_size % n_domain == 0 + self.n_img_per_domain = batch_size // n_domain + + self.batch_size = batch_size + # n_domain denotes number of domains sampled in a minibatch + self.n_domain = n_domain + self.length = len(list(self.__iter__())) + + def __iter__(self): + domain_dict = copy.deepcopy(self.domain_dict) + final_idxs = [] + stop_sampling = False + + while not stop_sampling: + for domain in self.domains: + idxs = domain_dict[domain] + selected_idxs = random.sample(idxs, self.n_img_per_domain) + final_idxs.extend(selected_idxs) + + for idx in selected_idxs: + domain_dict[domain].remove(idx) + + remaining = len(domain_dict[domain]) + if remaining < self.n_img_per_domain: + stop_sampling = True + + return iter(final_idxs) + + def __len__(self): + return self.length + + +class RandomClassSampler(Sampler): + """Randomly samples N classes each with K instances to + form a minibatch of size N*K. + + Modified from https://github.com/KaiyangZhou/deep-person-reid. + + Args: + data_source (list): list of Datums. + batch_size (int): batch size. + n_ins (int): number of instances per class to sample in a minibatch. + """ + + def __init__(self, data_source, batch_size, n_ins): + if batch_size < n_ins: + raise ValueError( + "batch_size={} must be no less " + "than n_ins={}".format(batch_size, n_ins) + ) + + self.data_source = data_source + self.batch_size = batch_size + self.n_ins = n_ins + self.ncls_per_batch = self.batch_size // self.n_ins + self.index_dic = defaultdict(list) + for index, item in enumerate(data_source): + self.index_dic[item.label].append(index) + self.labels = list(self.index_dic.keys()) + assert len(self.labels) >= self.ncls_per_batch + + # estimate number of images in an epoch + self.length = len(list(self.__iter__())) + + def __iter__(self): + batch_idxs_dict = defaultdict(list) + + for label in self.labels: + idxs = copy.deepcopy(self.index_dic[label]) + if len(idxs) < self.n_ins: + idxs = np.random.choice(idxs, size=self.n_ins, replace=True) + random.shuffle(idxs) + batch_idxs = [] + for idx in idxs: + batch_idxs.append(idx) + if len(batch_idxs) == self.n_ins: + batch_idxs_dict[label].append(batch_idxs) + batch_idxs = [] + + avai_labels = copy.deepcopy(self.labels) + final_idxs = [] + + while len(avai_labels) >= self.ncls_per_batch: + selected_labels = random.sample(avai_labels, self.ncls_per_batch) + for label in selected_labels: + batch_idxs = batch_idxs_dict[label].pop(0) + final_idxs.extend(batch_idxs) + if len(batch_idxs_dict[label]) == 0: + avai_labels.remove(label) + + return iter(final_idxs) + + def __len__(self): + return self.length + + +def build_sampler( + sampler_type, + cfg=None, + data_source=None, + batch_size=32, + n_domain=0, + n_ins=16 +): + if sampler_type == "RandomSampler": + return RandomSampler(data_source) + + elif sampler_type == "SequentialSampler": + return SequentialSampler(data_source) + + elif sampler_type == "RandomDomainSampler": + return RandomDomainSampler(data_source, batch_size, n_domain) + + elif sampler_type == "SeqDomainSampler": + return SeqDomainSampler(data_source, batch_size) + + elif sampler_type == "RandomClassSampler": + return RandomClassSampler(data_source, batch_size, n_ins) + + else: + raise ValueError("Unknown sampler type: {}".format(sampler_type)) diff --git a/Dassl.pytorch/dassl/data/transforms/__init__.py b/Dassl.pytorch/dassl/data/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02c05d674f447376d3ac3a559dcb4cb08f59e925 --- /dev/null +++ b/Dassl.pytorch/dassl/data/transforms/__init__.py @@ -0,0 +1 @@ +from .transforms import INTERPOLATION_MODES, build_transform diff --git a/Dassl.pytorch/dassl/data/transforms/autoaugment.py b/Dassl.pytorch/dassl/data/transforms/autoaugment.py new file mode 100644 index 0000000000000000000000000000000000000000..2e14fcee5caab12f1ebaa1333a14c38c32bff1f8 --- /dev/null +++ b/Dassl.pytorch/dassl/data/transforms/autoaugment.py @@ -0,0 +1,273 @@ +""" +Source: https://github.com/DeepVoltaire/AutoAugment +""" +import numpy as np +import random +from PIL import Image, ImageOps, ImageEnhance + + +class ImageNetPolicy: + """Randomly choose one of the best 24 Sub-policies on ImageNet. + + Example: + >>> policy = ImageNetPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> ImageNetPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor), + SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor), + SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor), + SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor), + SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor), + SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor), + SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor), + SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor), + SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor), + SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor), + SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), + ] + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment ImageNet Policy" + + +class CIFAR10Policy: + """Randomly choose one of the best 25 Sub-policies on CIFAR10. + + Example: + >>> policy = CIFAR10Policy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> CIFAR10Policy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor), + SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor), + SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor), + SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor), + SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor), + SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor), + SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor), + SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor), + SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor), + SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor), + SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor), + SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor), + SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor), + SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor), + SubPolicy(0.2, "equalize", 8, 0.6, "equalize", 4, fillcolor), + SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor), + SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor), + SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor), + SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor), + SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor), + ] + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment CIFAR10 Policy" + + +class SVHNPolicy: + """Randomly choose one of the best 25 Sub-policies on SVHN. + + Example: + >>> policy = SVHNPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> SVHNPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.9, "shearX", 4, 0.2, "invert", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.7, "invert", 5, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.6, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 3, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "equalize", 1, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.8, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.4, "invert", 5, fillcolor), + SubPolicy(0.9, "shearY", 5, 0.2, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 6, 0.8, "autocontrast", 1, fillcolor), + SubPolicy(0.6, "equalize", 3, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.3, "solarize", 3, fillcolor), + SubPolicy(0.8, "shearY", 8, 0.7, "invert", 4, fillcolor), + SubPolicy(0.9, "equalize", 5, 0.6, "translateY", 6, fillcolor), + SubPolicy(0.9, "invert", 4, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.3, "contrast", 3, 0.8, "rotate", 4, fillcolor), + SubPolicy(0.8, "invert", 5, 0.0, "translateY", 2, fillcolor), + SubPolicy(0.7, "shearY", 6, 0.4, "solarize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 0.8, "rotate", 4, fillcolor), + SubPolicy(0.3, "shearY", 7, 0.9, "translateX", 3, fillcolor), + SubPolicy(0.1, "shearX", 6, 0.6, "invert", 5, fillcolor), + SubPolicy(0.7, "solarize", 2, 0.6, "translateY", 7, fillcolor), + SubPolicy(0.8, "shearY", 4, 0.8, "invert", 8, fillcolor), + SubPolicy(0.7, "shearX", 9, 0.8, "translateY", 3, fillcolor), + SubPolicy(0.8, "shearY", 5, 0.7, "autocontrast", 3, fillcolor), + SubPolicy(0.7, "shearX", 2, 0.1, "invert", 5, fillcolor), + ] + + def __call__(self, img): + policy_idx = random.randint(0, len(self.policies) - 1) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment SVHN Policy" + + +class SubPolicy(object): + + def __init__( + self, + p1, + operation1, + magnitude_idx1, + p2, + operation2, + magnitude_idx2, + fillcolor=(128, 128, 128), + ): + ranges = { + "shearX": np.linspace(0, 0.3, 10), + "shearY": np.linspace(0, 0.3, 10), + "translateX": np.linspace(0, 150 / 331, 10), + "translateY": np.linspace(0, 150 / 331, 10), + "rotate": np.linspace(0, 30, 10), + "color": np.linspace(0.0, 0.9, 10), + "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int), + "solarize": np.linspace(256, 0, 10), + "contrast": np.linspace(0.0, 0.9, 10), + "sharpness": np.linspace(0.0, 0.9, 10), + "brightness": np.linspace(0.0, 0.9, 10), + "autocontrast": [0] * 10, + "equalize": [0] * 10, + "invert": [0] * 10, + } + + # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand + def rotate_with_fill(img, magnitude): + rot = img.convert("RGBA").rotate(magnitude) + return Image.composite( + rot, Image.new("RGBA", rot.size, (128, ) * 4), rot + ).convert(img.mode) + + func = { + "shearX": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "shearY": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, + fillcolor=fillcolor, + ), + "translateX": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + ( + 1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, + 1, 0 + ), + fillcolor=fillcolor, + ), + "translateY": + lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + ( + 1, 0, 0, 0, 1, magnitude * img.size[1] * random. + choice([-1, 1]) + ), + fillcolor=fillcolor, + ), + "rotate": + lambda img, magnitude: rotate_with_fill(img, magnitude), + "color": + lambda img, magnitude: ImageEnhance.Color(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": + lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": + lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": + lambda img, magnitude: ImageEnhance.Contrast(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "sharpness": + lambda img, magnitude: ImageEnhance.Sharpness(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "brightness": + lambda img, magnitude: ImageEnhance.Brightness(img). + enhance(1 + magnitude * random.choice([-1, 1])), + "autocontrast": + lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": + lambda img, magnitude: ImageOps.equalize(img), + "invert": + lambda img, magnitude: ImageOps.invert(img), + } + + self.p1 = p1 + self.operation1 = func[operation1] + self.magnitude1 = ranges[operation1][magnitude_idx1] + self.p2 = p2 + self.operation2 = func[operation2] + self.magnitude2 = ranges[operation2][magnitude_idx2] + + def __call__(self, img): + if random.random() < self.p1: + img = self.operation1(img, self.magnitude1) + if random.random() < self.p2: + img = self.operation2(img, self.magnitude2) + return img diff --git a/Dassl.pytorch/dassl/data/transforms/randaugment.py b/Dassl.pytorch/dassl/data/transforms/randaugment.py new file mode 100644 index 0000000000000000000000000000000000000000..5c39ff3ee4a8af917086e160aa9c3e5ed51cba17 --- /dev/null +++ b/Dassl.pytorch/dassl/data/transforms/randaugment.py @@ -0,0 +1,363 @@ +""" +Credit to +1) https://github.com/ildoonet/pytorch-randaugment +2) https://github.com/kakaobrain/fast-autoaugment +""" +import numpy as np +import random +import PIL +import torch +import PIL.ImageOps +import PIL.ImageDraw +import PIL.ImageEnhance +from PIL import Image + + +def ShearX(img, v): + assert -0.3 <= v <= 0.3 + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0)) + + +def ShearY(img, v): + assert -0.3 <= v <= 0.3 + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0)) + + +def TranslateX(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert -0.45 <= v <= 0.45 + if random.random() > 0.5: + v = -v + v = v * img.size[0] + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0)) + + +def TranslateXabs(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert 0 <= v + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0)) + + +def TranslateY(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert -0.45 <= v <= 0.45 + if random.random() > 0.5: + v = -v + v = v * img.size[1] + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v)) + + +def TranslateYabs(img, v): + # [-150, 150] => percentage: [-0.45, 0.45] + assert 0 <= v + if random.random() > 0.5: + v = -v + return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v)) + + +def Rotate(img, v): + assert -30 <= v <= 30 + if random.random() > 0.5: + v = -v + return img.rotate(v) + + +def AutoContrast(img, _): + return PIL.ImageOps.autocontrast(img) + + +def Invert(img, _): + return PIL.ImageOps.invert(img) + + +def Equalize(img, _): + return PIL.ImageOps.equalize(img) + + +def Flip(img, _): + return PIL.ImageOps.mirror(img) + + +def Solarize(img, v): + assert 0 <= v <= 256 + return PIL.ImageOps.solarize(img, v) + + +def SolarizeAdd(img, addition=0, threshold=128): + img_np = np.array(img).astype(np.int) + img_np = img_np + addition + img_np = np.clip(img_np, 0, 255) + img_np = img_np.astype(np.uint8) + img = Image.fromarray(img_np) + return PIL.ImageOps.solarize(img, threshold) + + +def Posterize(img, v): + assert 4 <= v <= 8 + v = int(v) + return PIL.ImageOps.posterize(img, v) + + +def Contrast(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Contrast(img).enhance(v) + + +def Color(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Color(img).enhance(v) + + +def Brightness(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Brightness(img).enhance(v) + + +def Sharpness(img, v): + assert 0.0 <= v <= 2.0 + return PIL.ImageEnhance.Sharpness(img).enhance(v) + + +def Cutout(img, v): + # [0, 60] => percentage: [0, 0.2] + assert 0.0 <= v <= 0.2 + if v <= 0.0: + return img + + v = v * img.size[0] + return CutoutAbs(img, v) + + +def CutoutAbs(img, v): + # [0, 60] => percentage: [0, 0.2] + # assert 0 <= v <= 20 + if v < 0: + return img + w, h = img.size + x0 = np.random.uniform(w) + y0 = np.random.uniform(h) + + x0 = int(max(0, x0 - v/2.0)) + y0 = int(max(0, y0 - v/2.0)) + x1 = min(w, x0 + v) + y1 = min(h, y0 + v) + + xy = (x0, y0, x1, y1) + color = (125, 123, 114) + # color = (0, 0, 0) + img = img.copy() + PIL.ImageDraw.Draw(img).rectangle(xy, color) + return img + + +def SamplePairing(imgs): + # [0, 0.4] + def f(img1, v): + i = np.random.choice(len(imgs)) + img2 = PIL.Image.fromarray(imgs[i]) + return PIL.Image.blend(img1, img2, v) + + return f + + +def Identity(img, v): + return img + + +class Lighting: + """Lighting noise (AlexNet - style PCA - based noise).""" + + def __init__(self, alphastd, eigval, eigvec): + self.alphastd = alphastd + self.eigval = torch.Tensor(eigval) + self.eigvec = torch.Tensor(eigvec) + + def __call__(self, img): + if self.alphastd == 0: + return img + + alpha = img.new().resize_(3).normal_(0, self.alphastd) + rgb = ( + self.eigvec.type_as(img).clone().mul( + alpha.view(1, 3).expand(3, 3) + ).mul(self.eigval.view(1, 3).expand(3, 3)).sum(1).squeeze() + ) + + return img.add(rgb.view(3, 1, 1).expand_as(img)) + + +class CutoutDefault: + """ + Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py + """ + + def __init__(self, length): + self.length = length + + def __call__(self, img): + h, w = img.size(1), img.size(2) + mask = np.ones((h, w), np.float32) + y = np.random.randint(h) + x = np.random.randint(w) + + y1 = np.clip(y - self.length // 2, 0, h) + y2 = np.clip(y + self.length // 2, 0, h) + x1 = np.clip(x - self.length // 2, 0, w) + x2 = np.clip(x + self.length // 2, 0, w) + + mask[y1:y2, x1:x2] = 0.0 + mask = torch.from_numpy(mask) + mask = mask.expand_as(img) + img *= mask + return img + + +def randaugment_list(): + # 16 oeprations and their ranges + # https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57 + # augs = [ + # (Identity, 0., 1.0), + # (ShearX, 0., 0.3), # 0 + # (ShearY, 0., 0.3), # 1 + # (TranslateX, 0., 0.33), # 2 + # (TranslateY, 0., 0.33), # 3 + # (Rotate, 0, 30), # 4 + # (AutoContrast, 0, 1), # 5 + # (Invert, 0, 1), # 6 + # (Equalize, 0, 1), # 7 + # (Solarize, 0, 110), # 8 + # (Posterize, 4, 8), # 9 + # # (Contrast, 0.1, 1.9), # 10 + # (Color, 0.1, 1.9), # 11 + # (Brightness, 0.1, 1.9), # 12 + # (Sharpness, 0.1, 1.9), # 13 + # # (Cutout, 0, 0.2), # 14 + # # (SamplePairing(imgs), 0, 0.4) # 15 + # ] + + # https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505 + augs = [ + (AutoContrast, 0, 1), + (Equalize, 0, 1), + (Invert, 0, 1), + (Rotate, 0, 30), + (Posterize, 4, 8), + (Solarize, 0, 256), + (SolarizeAdd, 0, 110), + (Color, 0.1, 1.9), + (Contrast, 0.1, 1.9), + (Brightness, 0.1, 1.9), + (Sharpness, 0.1, 1.9), + (ShearX, 0.0, 0.3), + (ShearY, 0.0, 0.3), + (CutoutAbs, 0, 40), + (TranslateXabs, 0.0, 100), + (TranslateYabs, 0.0, 100), + ] + + return augs + + +def randaugment_list2(): + augs = [ + (AutoContrast, 0, 1), + (Brightness, 0.1, 1.9), + (Color, 0.1, 1.9), + (Contrast, 0.1, 1.9), + (Equalize, 0, 1), + (Identity, 0, 1), + (Invert, 0, 1), + (Posterize, 4, 8), + (Rotate, -30, 30), + (Sharpness, 0.1, 1.9), + (ShearX, -0.3, 0.3), + (ShearY, -0.3, 0.3), + (Solarize, 0, 256), + (TranslateX, -0.3, 0.3), + (TranslateY, -0.3, 0.3), + ] + + return augs + + +def fixmatch_list(): + # https://arxiv.org/abs/2001.07685 + augs = [ + (AutoContrast, 0, 1), + (Brightness, 0.05, 0.95), + (Color, 0.05, 0.95), + (Contrast, 0.05, 0.95), + (Equalize, 0, 1), + (Identity, 0, 1), + (Posterize, 4, 8), + (Rotate, -30, 30), + (Sharpness, 0.05, 0.95), + (ShearX, -0.3, 0.3), + (ShearY, -0.3, 0.3), + (Solarize, 0, 256), + (TranslateX, -0.3, 0.3), + (TranslateY, -0.3, 0.3), + ] + + return augs + + +class RandAugment: + + def __init__(self, n=2, m=10): + assert 0 <= m <= 30 + self.n = n + self.m = m + self.augment_list = randaugment_list() + + def __call__(self, img): + ops = random.choices(self.augment_list, k=self.n) + + for op, minval, maxval in ops: + val = (self.m / 30) * (maxval-minval) + minval + img = op(img, val) + + return img + + +class RandAugment2: + + def __init__(self, n=2, p=0.6): + self.n = n + self.p = p + self.augment_list = randaugment_list2() + + def __call__(self, img): + ops = random.choices(self.augment_list, k=self.n) + + for op, minval, maxval in ops: + if random.random() > self.p: + continue + m = random.random() + val = m * (maxval-minval) + minval + img = op(img, val) + + return img + + +class RandAugmentFixMatch: + + def __init__(self, n=2): + self.n = n + self.augment_list = fixmatch_list() + + def __call__(self, img): + ops = random.choices(self.augment_list, k=self.n) + + for op, minval, maxval in ops: + m = random.random() + val = m * (maxval-minval) + minval + img = op(img, val) + + return img diff --git a/Dassl.pytorch/dassl/data/transforms/transforms.py b/Dassl.pytorch/dassl/data/transforms/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..4ea00d6eba549cc40c863438c262ec0f77f2e7b8 --- /dev/null +++ b/Dassl.pytorch/dassl/data/transforms/transforms.py @@ -0,0 +1,354 @@ +import numpy as np +import random +import torch +import torchvision.transforms.functional as F +from torchvision.transforms import ( + Resize, Compose, ToTensor, Normalize, CenterCrop, RandomCrop, ColorJitter, + RandomApply, GaussianBlur, RandomGrayscale, RandomResizedCrop, + RandomHorizontalFlip +) +from torchvision.transforms.functional import InterpolationMode + +from .autoaugment import SVHNPolicy, CIFAR10Policy, ImageNetPolicy +from .randaugment import RandAugment, RandAugment2, RandAugmentFixMatch + +AVAI_CHOICES = [ + "random_flip", + "random_resized_crop", + "normalize", + "instance_norm", + "random_crop", + "random_translation", + "center_crop", # This has become a default operation during testing + "cutout", + "imagenet_policy", + "cifar10_policy", + "svhn_policy", + "randaugment", + "randaugment_fixmatch", + "randaugment2", + "gaussian_noise", + "colorjitter", + "randomgrayscale", + "gaussian_blur", +] + +INTERPOLATION_MODES = { + "bilinear": InterpolationMode.BILINEAR, + "bicubic": InterpolationMode.BICUBIC, + "nearest": InterpolationMode.NEAREST, +} + + +class Random2DTranslation: + """Given an image of (height, width), we resize it to + (height*1.125, width*1.125), and then perform random cropping. + + Args: + height (int): target image height. + width (int): target image width. + p (float, optional): probability that this operation takes place. + Default is 0.5. + interpolation (int, optional): desired interpolation. Default is + ``torchvision.transforms.functional.InterpolationMode.BILINEAR`` + """ + + def __init__( + self, height, width, p=0.5, interpolation=InterpolationMode.BILINEAR + ): + self.height = height + self.width = width + self.p = p + self.interpolation = interpolation + + def __call__(self, img): + if random.uniform(0, 1) > self.p: + return F.resize( + img=img, + size=[self.height, self.width], + interpolation=self.interpolation + ) + + new_width = int(round(self.width * 1.125)) + new_height = int(round(self.height * 1.125)) + resized_img = F.resize( + img=img, + size=[new_height, new_width], + interpolation=self.interpolation + ) + x_maxrange = new_width - self.width + y_maxrange = new_height - self.height + x1 = int(round(random.uniform(0, x_maxrange))) + y1 = int(round(random.uniform(0, y_maxrange))) + croped_img = F.crop( + img=resized_img, + top=y1, + left=x1, + height=self.height, + width=self.width + ) + + return croped_img + + +class InstanceNormalization: + """Normalize data using per-channel mean and standard deviation. + + Reference: + - Ulyanov et al. Instance normalization: The missing in- gredient + for fast stylization. ArXiv 2016. + - Shu et al. A DIRT-T Approach to Unsupervised Domain Adaptation. + ICLR 2018. + """ + + def __init__(self, eps=1e-8): + self.eps = eps + + def __call__(self, img): + C, H, W = img.shape + img_re = img.reshape(C, H * W) + mean = img_re.mean(1).view(C, 1, 1) + std = img_re.std(1).view(C, 1, 1) + return (img-mean) / (std + self.eps) + + +class Cutout: + """Randomly mask out one or more patches from an image. + + https://github.com/uoguelph-mlrg/Cutout + + Args: + n_holes (int, optional): number of patches to cut out + of each image. Default is 1. + length (int, optinal): length (in pixels) of each square + patch. Default is 16. + """ + + def __init__(self, n_holes=1, length=16): + self.n_holes = n_holes + self.length = length + + def __call__(self, img): + """ + Args: + img (Tensor): tensor image of size (C, H, W). + + Returns: + Tensor: image with n_holes of dimension + length x length cut out of it. + """ + h = img.size(1) + w = img.size(2) + + mask = np.ones((h, w), np.float32) + + for n in range(self.n_holes): + y = np.random.randint(h) + x = np.random.randint(w) + + y1 = np.clip(y - self.length // 2, 0, h) + y2 = np.clip(y + self.length // 2, 0, h) + x1 = np.clip(x - self.length // 2, 0, w) + x2 = np.clip(x + self.length // 2, 0, w) + + mask[y1:y2, x1:x2] = 0.0 + + mask = torch.from_numpy(mask) + mask = mask.expand_as(img) + return img * mask + + +class GaussianNoise: + """Add gaussian noise.""" + + def __init__(self, mean=0, std=0.15, p=0.5): + self.mean = mean + self.std = std + self.p = p + + def __call__(self, img): + if random.uniform(0, 1) > self.p: + return img + noise = torch.randn(img.size()) * self.std + self.mean + return img + noise + + +def build_transform(cfg, is_train=True, choices=None): + """Build transformation function. + + Args: + cfg (CfgNode): config. + is_train (bool, optional): for training (True) or test (False). + Default is True. + choices (list, optional): list of strings which will overwrite + cfg.INPUT.TRANSFORMS if given. Default is None. + """ + if cfg.INPUT.NO_TRANSFORM: + print("Note: no transform is applied!") + return None + + if choices is None: + choices = cfg.INPUT.TRANSFORMS + + for choice in choices: + assert choice in AVAI_CHOICES + + target_size = f"{cfg.INPUT.SIZE[0]}x{cfg.INPUT.SIZE[1]}" + + normalize = Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) + + if is_train: + return _build_transform_train(cfg, choices, target_size, normalize) + else: + return _build_transform_test(cfg, choices, target_size, normalize) + + +def _build_transform_train(cfg, choices, target_size, normalize): + print("Building transform_train") + tfm_train = [] + + interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] + input_size = cfg.INPUT.SIZE + + # Make sure the image size matches the target size + conditions = [] + conditions += ["random_crop" not in choices] + conditions += ["random_resized_crop" not in choices] + if all(conditions): + print(f"+ resize to {target_size}") + tfm_train += [Resize(input_size, interpolation=interp_mode)] + + if "random_translation" in choices: + print("+ random translation") + tfm_train += [Random2DTranslation(input_size[0], input_size[1])] + + if "random_crop" in choices: + crop_padding = cfg.INPUT.CROP_PADDING + print(f"+ random crop (padding = {crop_padding})") + tfm_train += [RandomCrop(input_size, padding=crop_padding)] + + if "random_resized_crop" in choices: + s_ = cfg.INPUT.RRCROP_SCALE + print(f"+ random resized crop (size={input_size}, scale={s_})") + tfm_train += [ + RandomResizedCrop(input_size, scale=s_, interpolation=interp_mode) + ] + + if "random_flip" in choices: + print("+ random flip") + tfm_train += [RandomHorizontalFlip()] + + if "imagenet_policy" in choices: + print("+ imagenet policy") + tfm_train += [ImageNetPolicy()] + + if "cifar10_policy" in choices: + print("+ cifar10 policy") + tfm_train += [CIFAR10Policy()] + + if "svhn_policy" in choices: + print("+ svhn policy") + tfm_train += [SVHNPolicy()] + + if "randaugment" in choices: + n_ = cfg.INPUT.RANDAUGMENT_N + m_ = cfg.INPUT.RANDAUGMENT_M + print(f"+ randaugment (n={n_}, m={m_})") + tfm_train += [RandAugment(n_, m_)] + + if "randaugment_fixmatch" in choices: + n_ = cfg.INPUT.RANDAUGMENT_N + print(f"+ randaugment_fixmatch (n={n_})") + tfm_train += [RandAugmentFixMatch(n_)] + + if "randaugment2" in choices: + n_ = cfg.INPUT.RANDAUGMENT_N + print(f"+ randaugment2 (n={n_})") + tfm_train += [RandAugment2(n_)] + + if "colorjitter" in choices: + b_ = cfg.INPUT.COLORJITTER_B + c_ = cfg.INPUT.COLORJITTER_C + s_ = cfg.INPUT.COLORJITTER_S + h_ = cfg.INPUT.COLORJITTER_H + print( + f"+ color jitter (brightness={b_}, " + f"contrast={c_}, saturation={s_}, hue={h_})" + ) + tfm_train += [ + ColorJitter( + brightness=b_, + contrast=c_, + saturation=s_, + hue=h_, + ) + ] + + if "randomgrayscale" in choices: + print("+ random gray scale") + tfm_train += [RandomGrayscale(p=cfg.INPUT.RGS_P)] + + if "gaussian_blur" in choices: + print(f"+ gaussian blur (kernel={cfg.INPUT.GB_K})") + gb_k, gb_p = cfg.INPUT.GB_K, cfg.INPUT.GB_P + tfm_train += [RandomApply([GaussianBlur(gb_k)], p=gb_p)] + + print("+ to torch tensor of range [0, 1]") + tfm_train += [ToTensor()] + + if "cutout" in choices: + cutout_n = cfg.INPUT.CUTOUT_N + cutout_len = cfg.INPUT.CUTOUT_LEN + print(f"+ cutout (n_holes={cutout_n}, length={cutout_len})") + tfm_train += [Cutout(cutout_n, cutout_len)] + + if "normalize" in choices: + print( + f"+ normalization (mean={cfg.INPUT.PIXEL_MEAN}, std={cfg.INPUT.PIXEL_STD})" + ) + tfm_train += [normalize] + + if "gaussian_noise" in choices: + print( + f"+ gaussian noise (mean={cfg.INPUT.GN_MEAN}, std={cfg.INPUT.GN_STD})" + ) + tfm_train += [GaussianNoise(cfg.INPUT.GN_MEAN, cfg.INPUT.GN_STD)] + + if "instance_norm" in choices: + print("+ instance normalization") + tfm_train += [InstanceNormalization()] + + tfm_train = Compose(tfm_train) + + return tfm_train + + +def _build_transform_test(cfg, choices, target_size, normalize): + print("Building transform_test") + tfm_test = [] + + interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] + input_size = cfg.INPUT.SIZE + + print(f"+ resize the smaller edge to {max(input_size)}") + tfm_test += [Resize(max(input_size), interpolation=interp_mode)] + + print(f"+ {target_size} center crop") + tfm_test += [CenterCrop(input_size)] + + print("+ to torch tensor of range [0, 1]") + tfm_test += [ToTensor()] + + if "normalize" in choices: + print( + f"+ normalization (mean={cfg.INPUT.PIXEL_MEAN}, std={cfg.INPUT.PIXEL_STD})" + ) + tfm_test += [normalize] + + if "instance_norm" in choices: + print("+ instance normalization") + tfm_test += [InstanceNormalization()] + + tfm_test = Compose(tfm_test) + + return tfm_test diff --git a/Dassl.pytorch/dassl/engine/__init__.py b/Dassl.pytorch/dassl/engine/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3cb3fb86028e28f1d807c4454f77eb2bae6c0aa8 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/__init__.py @@ -0,0 +1,6 @@ +from .build import TRAINER_REGISTRY, build_trainer # isort:skip +from .trainer import TrainerX, TrainerXU, TrainerBase, SimpleTrainer, SimpleNet # isort:skip + +from .da import * +from .dg import * +from .ssl import * diff --git a/Dassl.pytorch/dassl/engine/build.py b/Dassl.pytorch/dassl/engine/build.py new file mode 100644 index 0000000000000000000000000000000000000000..456ba8dde739ccad44a14317ca2991dbef32db18 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +TRAINER_REGISTRY = Registry("TRAINER") + + +def build_trainer(cfg): + avai_trainers = TRAINER_REGISTRY.registered_names() + check_availability(cfg.TRAINER.NAME, avai_trainers) + if cfg.VERBOSE: + print("Loading trainer: {}".format(cfg.TRAINER.NAME)) + return TRAINER_REGISTRY.get(cfg.TRAINER.NAME)(cfg) diff --git a/Dassl.pytorch/dassl/engine/da/__init__.py b/Dassl.pytorch/dassl/engine/da/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..910bf34b99b86dab9be09c59b84311401aaf2dbf --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/__init__.py @@ -0,0 +1,10 @@ +from .se import SE +from .mcd import MCD +from .mme import MME +from .adda import ADDA +from .cdac import CDAC +from .dael import DAEL +from .dann import DANN +from .adabn import AdaBN +from .m3sda import M3SDA +from .source_only import SourceOnly diff --git a/Dassl.pytorch/dassl/engine/da/adabn.py b/Dassl.pytorch/dassl/engine/da/adabn.py new file mode 100644 index 0000000000000000000000000000000000000000..116d8a21183cdd4d476c1c40bc1aec7a155f5c7f --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/adabn.py @@ -0,0 +1,38 @@ +import torch + +from dassl.utils import check_isfile +from dassl.engine import TRAINER_REGISTRY, TrainerXU + + +@TRAINER_REGISTRY.register() +class AdaBN(TrainerXU): + """Adaptive Batch Normalization. + + https://arxiv.org/abs/1603.04779. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.done_reset_bn_stats = False + + def check_cfg(self, cfg): + assert check_isfile( + cfg.MODEL.INIT_WEIGHTS + ), "The weights of source model must be provided" + + def before_epoch(self): + if not self.done_reset_bn_stats: + for m in self.model.modules(): + classname = m.__class__.__name__ + if classname.find("BatchNorm") != -1: + m.reset_running_stats() + + self.done_reset_bn_stats = True + + def forward_backward(self, batch_x, batch_u): + input_u = batch_u["img"].to(self.device) + + with torch.no_grad(): + self.model(input_u) + + return None diff --git a/Dassl.pytorch/dassl/engine/da/adda.py b/Dassl.pytorch/dassl/engine/da/adda.py new file mode 100644 index 0000000000000000000000000000000000000000..a9018e7861237a5bd3bdadf1cc4ea656ea5fb7b1 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/adda.py @@ -0,0 +1,85 @@ +import copy +import torch +import torch.nn as nn + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import check_isfile, count_num_param, open_specified_layers +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.modeling import build_head + + +@TRAINER_REGISTRY.register() +class ADDA(TrainerXU): + """Adversarial Discriminative Domain Adaptation. + + https://arxiv.org/abs/1702.05464. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.open_layers = ["backbone"] + if isinstance(self.model.head, nn.Module): + self.open_layers.append("head") + + self.source_model = copy.deepcopy(self.model) + self.source_model.eval() + for param in self.source_model.parameters(): + param.requires_grad_(False) + + self.build_critic() + + self.bce = nn.BCEWithLogitsLoss() + + def check_cfg(self, cfg): + assert check_isfile( + cfg.MODEL.INIT_WEIGHTS + ), "The weights of source model must be provided" + + def build_critic(self): + cfg = self.cfg + + print("Building critic network") + fdim = self.model.fdim + critic_body = build_head( + "mlp", + verbose=cfg.VERBOSE, + in_features=fdim, + hidden_layers=[fdim, fdim // 2], + activation="leaky_relu", + ) + self.critic = nn.Sequential(critic_body, nn.Linear(fdim // 2, 1)) + print("# params: {:,}".format(count_num_param(self.critic))) + self.critic.to(self.device) + self.optim_c = build_optimizer(self.critic, cfg.OPTIM) + self.sched_c = build_lr_scheduler(self.optim_c, cfg.OPTIM) + self.register_model("critic", self.critic, self.optim_c, self.sched_c) + + def forward_backward(self, batch_x, batch_u): + open_specified_layers(self.model, self.open_layers) + input_x, _, input_u = self.parse_batch_train(batch_x, batch_u) + domain_x = torch.ones(input_x.shape[0], 1).to(self.device) + domain_u = torch.zeros(input_u.shape[0], 1).to(self.device) + + _, feat_x = self.source_model(input_x, return_feature=True) + _, feat_u = self.model(input_u, return_feature=True) + + logit_xd = self.critic(feat_x) + logit_ud = self.critic(feat_u.detach()) + + loss_critic = self.bce(logit_xd, domain_x) + loss_critic += self.bce(logit_ud, domain_u) + self.model_backward_and_update(loss_critic, "critic") + + logit_ud = self.critic(feat_u) + loss_model = self.bce(logit_ud, 1 - domain_u) + self.model_backward_and_update(loss_model, "model") + + loss_summary = { + "loss_critic": loss_critic.item(), + "loss_model": loss_model.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/Dassl.pytorch/dassl/engine/da/cdac.py b/Dassl.pytorch/dassl/engine/da/cdac.py new file mode 100644 index 0000000000000000000000000000000000000000..ed846597d3cfc32b8bc65c2d0f439dde7403e064 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/cdac.py @@ -0,0 +1,275 @@ +import numpy as np +from functools import partial +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.optim.lr_scheduler import LambdaLR + +from dassl.data import DataManager +from dassl.optim import build_optimizer +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops import ReverseGrad +from dassl.engine.trainer import SimpleNet +from dassl.data.transforms.transforms import build_transform + + +def custom_scheduler(iter, max_iter=None, alpha=10, beta=0.75, init_lr=0.001): + """Custom LR Annealing + + https://arxiv.org/pdf/1409.7495.pdf + """ + if max_iter is None: + return init_lr + return (1 + float(iter / max_iter) * alpha)**(-1.0 * beta) + + +class AAC(nn.Module): + + def forward(self, sim_mat, prob_u, prob_us): + + P = prob_u.matmul(prob_us.t()) + + loss = -( + sim_mat * torch.log(P + 1e-7) + + (1.-sim_mat) * torch.log(1. - P + 1e-7) + ) + return loss.mean() + + +class Prototypes(nn.Module): + + def __init__(self, fdim, num_classes, temp=0.05): + super().__init__() + self.prototypes = nn.Linear(fdim, num_classes, bias=False) + self.temp = temp + self.revgrad = ReverseGrad() + + def forward(self, x, reverse=False): + if reverse: + x = self.revgrad(x) + x = F.normalize(x, p=2, dim=1) + out = self.prototypes(x) + out = out / self.temp + return out + + +@TRAINER_REGISTRY.register() +class CDAC(TrainerXU): + """Cross Domain Adaptive Clustering. + + https://arxiv.org/pdf/2104.09415.pdf + """ + + def __init__(self, cfg): + self.rampup_coef = cfg.TRAINER.CDAC.RAMPUP_COEF + self.rampup_iters = cfg.TRAINER.CDAC.RAMPUP_ITRS + self.lr_multi = cfg.TRAINER.CDAC.CLASS_LR_MULTI + self.topk = cfg.TRAINER.CDAC.TOPK_MATCH + self.p_thresh = cfg.TRAINER.CDAC.P_THRESH + self.aac_criterion = AAC() + super().__init__(cfg) + + def check_cfg(self, cfg): + assert len( + cfg.TRAINER.CDAC.STRONG_TRANSFORMS + ) > 0, "Strong augmentations are necessary to run CDAC" + assert cfg.DATALOADER.K_TRANSFORMS == 2, "CDAC needs two strong augmentations of the same image." + + def build_data_loader(self): + + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.CDAC.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + self.dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = self.dm.train_loader_x + self.train_loader_u = self.dm.train_loader_u + self.val_loader = self.dm.val_loader + self.test_loader = self.dm.test_loader + self.num_classes = self.dm.num_classes + self.lab2cname = self.dm.lab2cname + + def build_model(self): + cfg = self.cfg + + # Custom LR Scheduler for CDAC + if self.cfg.TRAIN.COUNT_ITER == "train_x": + self.num_batches = len(self.train_loader_x) + elif self.cfg.TRAIN.COUNT_ITER == "train_u": + self.num_batches = len(self.len_train_loader_u) + elif self.cfg.TRAIN.COUNT_ITER == "smaller_one": + self.num_batches = min( + len(self.train_loader_x), len(self.train_loader_u) + ) + self.max_iter = self.max_epoch * self.num_batches + print("Max Iterations: %d" % self.max_iter) + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + custom_lr_F = partial( + custom_scheduler, max_iter=self.max_iter, init_lr=cfg.OPTIM.LR + ) + self.sched_F = LambdaLR(self.optim_F, custom_lr_F) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building C") + self.C = Prototypes(self.F.fdim, self.num_classes) + self.C.to(self.device) + print("# params: {:,}".format(count_num_param(self.C))) + self.optim_C = build_optimizer(self.C, cfg.OPTIM) + + # Multiply the learning rate of C by lr_multi + for group_param in self.optim_C.param_groups: + group_param['lr'] *= self.lr_multi + custom_lr_C = partial( + custom_scheduler, + max_iter=self.max_iter, + init_lr=cfg.OPTIM.LR * self.lr_multi + ) + self.sched_C = LambdaLR(self.optim_C, custom_lr_C) + self.register_model("C", self.C, self.optim_C, self.sched_C) + + def assess_y_pred_quality(self, y_pred, y_true, mask): + n_masked_correct = (y_pred.eq(y_true).float() * mask).sum() + acc_thre = n_masked_correct / (mask.sum() + 1e-5) + acc_raw = y_pred.eq(y_true).sum() / y_pred.numel() # raw accuracy + keep_rate = mask.sum() / mask.numel() + output = { + "acc_thre": acc_thre, + "acc_raw": acc_raw, + "keep_rate": keep_rate + } + return output + + def forward_backward(self, batch_x, batch_u): + + current_itr = self.epoch * self.num_batches + self.batch_idx + + input_x, label_x, input_u, input_us, input_us2, label_u = self.parse_batch_train( + batch_x, batch_u + ) + + # Paper Reference Eq. 2 - Supervised Loss + + feat_x = self.F(input_x) + logit_x = self.C(feat_x) + loss_x = F.cross_entropy(logit_x, label_x) + + self.model_backward_and_update(loss_x) + + feat_u = self.F(input_u) + feat_us = self.F(input_us) + feat_us2 = self.F(input_us2) + + # Paper Reference Eq.3 - Adversarial Adaptive Loss + logit_u = self.C(feat_u, reverse=True) + logit_us = self.C(feat_us, reverse=True) + prob_u, prob_us = F.softmax(logit_u, dim=1), F.softmax(logit_us, dim=1) + + # Get similarity matrix s_ij + sim_mat = self.get_similarity_matrix(feat_u, self.topk, self.device) + + aac_loss = (-1. * self.aac_criterion(sim_mat, prob_u, prob_us)) + + # Paper Reference Eq. 4 - Pseudo label Loss + logit_u = self.C(feat_u) + logit_us = self.C(feat_us) + logit_us2 = self.C(feat_us2) + prob_u, prob_us, prob_us2 = F.softmax( + logit_u, dim=1 + ), F.softmax( + logit_us, dim=1 + ), F.softmax( + logit_us2, dim=1 + ) + prob_u = prob_u.detach() + max_probs, max_idx = torch.max(prob_u, dim=-1) + mask = max_probs.ge(self.p_thresh).float() + p_u_stats = self.assess_y_pred_quality(max_idx, label_u, mask) + + pl_loss = ( + F.cross_entropy(logit_us2, max_idx, reduction='none') * mask + ).mean() + + # Paper Reference Eq. 8 - Consistency Loss + cons_multi = self.sigmoid_rampup( + current_itr=current_itr, rampup_itr=self.rampup_iters + ) * self.rampup_coef + cons_loss = cons_multi * F.mse_loss(prob_us, prob_us2) + + loss_u = aac_loss + pl_loss + cons_loss + + self.model_backward_and_update(loss_u) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + "aac_loss": aac_loss.item(), + "pl_loss": pl_loss.item(), + "cons_loss": cons_loss.item(), + "p_u_pred_acc": p_u_stats["acc_raw"], + "p_u_pred_acc_thre": p_u_stats["acc_thre"], + "p_u_pred_keep": p_u_stats["keep_rate"] + } + + # Update LR after every iteration as mentioned in the paper + + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + + input_x = batch_x["img"][0] + label_x = batch_x["label"] + + input_u = batch_u["img"][0] + input_us = batch_u["img2"][0] + input_us2 = batch_u["img2"][1] + label_u = batch_u["label"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + + input_u = input_u.to(self.device) + input_us = input_us.to(self.device) + input_us2 = input_us2.to(self.device) + label_u = label_u.to(self.device) + + return input_x, label_x, input_u, input_us, input_us2, label_u + + def model_inference(self, input): + return self.C(self.F(input)) + + @staticmethod + def get_similarity_matrix(feat, topk, device): + + feat_d = feat.detach() + + feat_d = torch.sort( + torch.argsort(feat_d, dim=1, descending=True)[:, :topk], dim=1 + )[0] + sim_mat = torch.zeros((feat_d.shape[0], feat_d.shape[0])).to(device) + for row in range(feat_d.shape[0]): + sim_mat[row, torch.all(feat_d == feat_d[row, :], dim=1)] = 1 + return sim_mat + + @staticmethod + def sigmoid_rampup(current_itr, rampup_itr): + """Exponential Rampup + https://arxiv.org/abs/1610.02242 + """ + if rampup_itr == 0: + return 1.0 + else: + var = np.clip(current_itr, 0.0, rampup_itr) + phase = 1.0 - var/rampup_itr + return float(np.exp(-5.0 * phase * phase)) diff --git a/Dassl.pytorch/dassl/engine/da/dael.py b/Dassl.pytorch/dassl/engine/da/dael.py new file mode 100644 index 0000000000000000000000000000000000000000..458df7da350d8d4a4d876362bee88632ecab7d52 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/dael.py @@ -0,0 +1,210 @@ +import torch +import torch.nn as nn + +from dassl.data import DataManager +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.engine.trainer import SimpleNet +from dassl.data.transforms import build_transform +from dassl.modeling.ops.utils import create_onehot + + +class Experts(nn.Module): + + def __init__(self, n_source, fdim, num_classes): + super().__init__() + self.linears = nn.ModuleList( + [nn.Linear(fdim, num_classes) for _ in range(n_source)] + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, i, x): + x = self.linears[i](x) + x = self.softmax(x) + return x + + +@TRAINER_REGISTRY.register() +class DAEL(TrainerXU): + """Domain Adaptive Ensemble Learning. + + https://arxiv.org/abs/2003.07325. + """ + + def __init__(self, cfg): + super().__init__(cfg) + n_domain = cfg.DATALOADER.TRAIN_X.N_DOMAIN + batch_size = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + if n_domain <= 0: + n_domain = self.num_source_domains + self.split_batch = batch_size // n_domain + self.n_domain = n_domain + + self.weight_u = cfg.TRAINER.DAEL.WEIGHT_U + self.conf_thre = cfg.TRAINER.DAEL.CONF_THRE + + def check_cfg(self, cfg): + assert cfg.DATALOADER.TRAIN_X.SAMPLER == "RandomDomainSampler" + assert not cfg.DATALOADER.TRAIN_U.SAME_AS_X + assert len(cfg.TRAINER.DAEL.STRONG_TRANSFORMS) > 0 + + def build_data_loader(self): + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.DAEL.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = dm.train_loader_x + self.train_loader_u = dm.train_loader_u + self.val_loader = dm.val_loader + self.test_loader = dm.test_loader + self.num_classes = dm.num_classes + self.num_source_domains = dm.num_source_domains + self.lab2cname = dm.lab2cname + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building E") + self.E = Experts(self.num_source_domains, fdim, self.num_classes) + self.E.to(self.device) + print("# params: {:,}".format(count_num_param(self.E))) + self.optim_E = build_optimizer(self.E, cfg.OPTIM) + self.sched_E = build_lr_scheduler(self.optim_E, cfg.OPTIM) + self.register_model("E", self.E, self.optim_E, self.sched_E) + + def forward_backward(self, batch_x, batch_u): + parsed_data = self.parse_batch_train(batch_x, batch_u) + input_x, input_x2, label_x, domain_x, input_u, input_u2 = parsed_data + + input_x = torch.split(input_x, self.split_batch, 0) + input_x2 = torch.split(input_x2, self.split_batch, 0) + label_x = torch.split(label_x, self.split_batch, 0) + domain_x = torch.split(domain_x, self.split_batch, 0) + domain_x = [d[0].item() for d in domain_x] + + # Generate pseudo label + with torch.no_grad(): + feat_u = self.F(input_u) + pred_u = [] + for k in range(self.num_source_domains): + pred_uk = self.E(k, feat_u) + pred_uk = pred_uk.unsqueeze(1) + pred_u.append(pred_uk) + pred_u = torch.cat(pred_u, 1) # (B, K, C) + # Get the highest probability and index (label) for each expert + experts_max_p, experts_max_idx = pred_u.max(2) # (B, K) + # Get the most confident expert + max_expert_p, max_expert_idx = experts_max_p.max(1) # (B) + pseudo_label_u = [] + for i, experts_label in zip(max_expert_idx, experts_max_idx): + pseudo_label_u.append(experts_label[i]) + pseudo_label_u = torch.stack(pseudo_label_u, 0) + pseudo_label_u = create_onehot(pseudo_label_u, self.num_classes) + pseudo_label_u = pseudo_label_u.to(self.device) + label_u_mask = (max_expert_p >= self.conf_thre).float() + + loss_x = 0 + loss_cr = 0 + acc_x = 0 + + feat_x = [self.F(x) for x in input_x] + feat_x2 = [self.F(x) for x in input_x2] + feat_u2 = self.F(input_u2) + + for feat_xi, feat_x2i, label_xi, i in zip( + feat_x, feat_x2, label_x, domain_x + ): + cr_s = [j for j in domain_x if j != i] + + # Learning expert + pred_xi = self.E(i, feat_xi) + loss_x += (-label_xi * torch.log(pred_xi + 1e-5)).sum(1).mean() + expert_label_xi = pred_xi.detach() + acc_x += compute_accuracy(pred_xi.detach(), + label_xi.max(1)[1])[0].item() + + # Consistency regularization + cr_pred = [] + for j in cr_s: + pred_j = self.E(j, feat_x2i) + pred_j = pred_j.unsqueeze(1) + cr_pred.append(pred_j) + cr_pred = torch.cat(cr_pred, 1) + cr_pred = cr_pred.mean(1) + loss_cr += ((cr_pred - expert_label_xi)**2).sum(1).mean() + + loss_x /= self.n_domain + loss_cr /= self.n_domain + acc_x /= self.n_domain + + # Unsupervised loss + pred_u = [] + for k in range(self.num_source_domains): + pred_uk = self.E(k, feat_u2) + pred_uk = pred_uk.unsqueeze(1) + pred_u.append(pred_uk) + pred_u = torch.cat(pred_u, 1) + pred_u = pred_u.mean(1) + l_u = (-pseudo_label_u * torch.log(pred_u + 1e-5)).sum(1) + loss_u = (l_u * label_u_mask).mean() + + loss = 0 + loss += loss_x + loss += loss_cr + loss += loss_u * self.weight_u + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": acc_x, + "loss_cr": loss_cr.item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + input_x2 = batch_x["img2"] + label_x = batch_x["label"] + domain_x = batch_x["domain"] + input_u = batch_u["img"] + input_u2 = batch_u["img2"] + + label_x = create_onehot(label_x, self.num_classes) + + input_x = input_x.to(self.device) + input_x2 = input_x2.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + input_u2 = input_u2.to(self.device) + + return input_x, input_x2, label_x, domain_x, input_u, input_u2 + + def model_inference(self, input): + f = self.F(input) + p = [] + for k in range(self.num_source_domains): + p_k = self.E(k, f) + p_k = p_k.unsqueeze(1) + p.append(p_k) + p = torch.cat(p, 1) + p = p.mean(1) + return p diff --git a/Dassl.pytorch/dassl/engine/da/dann.py b/Dassl.pytorch/dassl/engine/da/dann.py new file mode 100644 index 0000000000000000000000000000000000000000..64bb3f7de48c66ddd83e9d2fbec41d74ea351d83 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/dann.py @@ -0,0 +1,78 @@ +import numpy as np +import torch +import torch.nn as nn + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling import build_head +from dassl.modeling.ops import ReverseGrad + + +@TRAINER_REGISTRY.register() +class DANN(TrainerXU): + """Domain-Adversarial Neural Networks. + + https://arxiv.org/abs/1505.07818. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.build_critic() + self.ce = nn.CrossEntropyLoss() + self.bce = nn.BCEWithLogitsLoss() + + def build_critic(self): + cfg = self.cfg + + print("Building critic network") + fdim = self.model.fdim + critic_body = build_head( + "mlp", + verbose=cfg.VERBOSE, + in_features=fdim, + hidden_layers=[fdim, fdim], + activation="leaky_relu", + ) + self.critic = nn.Sequential(critic_body, nn.Linear(fdim, 1)) + print("# params: {:,}".format(count_num_param(self.critic))) + self.critic.to(self.device) + self.optim_c = build_optimizer(self.critic, cfg.OPTIM) + self.sched_c = build_lr_scheduler(self.optim_c, cfg.OPTIM) + self.register_model("critic", self.critic, self.optim_c, self.sched_c) + self.revgrad = ReverseGrad() + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + domain_x = torch.ones(input_x.shape[0], 1).to(self.device) + domain_u = torch.zeros(input_u.shape[0], 1).to(self.device) + + global_step = self.batch_idx + self.epoch * self.num_batches + progress = global_step / (self.max_epoch * self.num_batches) + lmda = 2 / (1 + np.exp(-10 * progress)) - 1 + + logit_x, feat_x = self.model(input_x, return_feature=True) + _, feat_u = self.model(input_u, return_feature=True) + + loss_x = self.ce(logit_x, label_x) + + feat_x = self.revgrad(feat_x, grad_scaling=lmda) + feat_u = self.revgrad(feat_u, grad_scaling=lmda) + output_xd = self.critic(feat_x) + output_ud = self.critic(feat_u) + loss_d = self.bce(output_xd, domain_x) + self.bce(output_ud, domain_u) + + loss = loss_x + loss_d + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_d": loss_d.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/Dassl.pytorch/dassl/engine/da/m3sda.py b/Dassl.pytorch/dassl/engine/da/m3sda.py new file mode 100644 index 0000000000000000000000000000000000000000..59b5673feedad04e1ba8ad61e19f4513d0660300 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/m3sda.py @@ -0,0 +1,208 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.engine.trainer import SimpleNet + + +class PairClassifiers(nn.Module): + + def __init__(self, fdim, num_classes): + super().__init__() + self.c1 = nn.Linear(fdim, num_classes) + self.c2 = nn.Linear(fdim, num_classes) + + def forward(self, x): + z1 = self.c1(x) + if not self.training: + return z1 + z2 = self.c2(x) + return z1, z2 + + +@TRAINER_REGISTRY.register() +class M3SDA(TrainerXU): + """Moment Matching for Multi-Source Domain Adaptation. + + https://arxiv.org/abs/1812.01754. + """ + + def __init__(self, cfg): + super().__init__(cfg) + n_domain = cfg.DATALOADER.TRAIN_X.N_DOMAIN + batch_size = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + if n_domain <= 0: + n_domain = self.num_source_domains + self.split_batch = batch_size // n_domain + self.n_domain = n_domain + + self.n_step_F = cfg.TRAINER.M3SDA.N_STEP_F + self.lmda = cfg.TRAINER.M3SDA.LMDA + + def check_cfg(self, cfg): + assert cfg.DATALOADER.TRAIN_X.SAMPLER == "RandomDomainSampler" + assert not cfg.DATALOADER.TRAIN_U.SAME_AS_X + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building C") + self.C = nn.ModuleList( + [ + PairClassifiers(fdim, self.num_classes) + for _ in range(self.num_source_domains) + ] + ) + self.C.to(self.device) + print("# params: {:,}".format(count_num_param(self.C))) + self.optim_C = build_optimizer(self.C, cfg.OPTIM) + self.sched_C = build_lr_scheduler(self.optim_C, cfg.OPTIM) + self.register_model("C", self.C, self.optim_C, self.sched_C) + + def forward_backward(self, batch_x, batch_u): + parsed = self.parse_batch_train(batch_x, batch_u) + input_x, label_x, domain_x, input_u = parsed + + input_x = torch.split(input_x, self.split_batch, 0) + label_x = torch.split(label_x, self.split_batch, 0) + domain_x = torch.split(domain_x, self.split_batch, 0) + domain_x = [d[0].item() for d in domain_x] + + # Step A + loss_x = 0 + feat_x = [] + + for x, y, d in zip(input_x, label_x, domain_x): + f = self.F(x) + z1, z2 = self.C[d](f) + loss_x += F.cross_entropy(z1, y) + F.cross_entropy(z2, y) + + feat_x.append(f) + + loss_x /= self.n_domain + + feat_u = self.F(input_u) + loss_msda = self.moment_distance(feat_x, feat_u) + + loss_step_A = loss_x + loss_msda * self.lmda + self.model_backward_and_update(loss_step_A) + + # Step B + with torch.no_grad(): + feat_u = self.F(input_u) + + loss_x, loss_dis = 0, 0 + + for x, y, d in zip(input_x, label_x, domain_x): + with torch.no_grad(): + f = self.F(x) + z1, z2 = self.C[d](f) + loss_x += F.cross_entropy(z1, y) + F.cross_entropy(z2, y) + + z1, z2 = self.C[d](feat_u) + p1 = F.softmax(z1, 1) + p2 = F.softmax(z2, 1) + loss_dis += self.discrepancy(p1, p2) + + loss_x /= self.n_domain + loss_dis /= self.n_domain + + loss_step_B = loss_x - loss_dis + self.model_backward_and_update(loss_step_B, "C") + + # Step C + for _ in range(self.n_step_F): + feat_u = self.F(input_u) + + loss_dis = 0 + + for d in domain_x: + z1, z2 = self.C[d](feat_u) + p1 = F.softmax(z1, 1) + p2 = F.softmax(z2, 1) + loss_dis += self.discrepancy(p1, p2) + + loss_dis /= self.n_domain + loss_step_C = loss_dis + + self.model_backward_and_update(loss_step_C, "F") + + loss_summary = { + "loss_step_A": loss_step_A.item(), + "loss_step_B": loss_step_B.item(), + "loss_step_C": loss_step_C.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def moment_distance(self, x, u): + # x (list): a list of feature matrix. + # u (torch.Tensor): feature matrix. + x_mean = [xi.mean(0) for xi in x] + u_mean = u.mean(0) + dist1 = self.pairwise_distance(x_mean, u_mean) + + x_var = [xi.var(0) for xi in x] + u_var = u.var(0) + dist2 = self.pairwise_distance(x_var, u_var) + + return (dist1+dist2) / 2 + + def pairwise_distance(self, x, u): + # x (list): a list of feature vector. + # u (torch.Tensor): feature vector. + dist = 0 + count = 0 + + for xi in x: + dist += self.euclidean(xi, u) + count += 1 + + for i in range(len(x) - 1): + for j in range(i + 1, len(x)): + dist += self.euclidean(x[i], x[j]) + count += 1 + + return dist / count + + def euclidean(self, input1, input2): + return ((input1 - input2)**2).sum().sqrt() + + def discrepancy(self, y1, y2): + return (y1 - y2).abs().mean() + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + label_x = batch_x["label"] + domain_x = batch_x["domain"] + input_u = batch_u["img"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + + return input_x, label_x, domain_x, input_u + + def model_inference(self, input): + f = self.F(input) + p = 0 + for C_i in self.C: + z = C_i(f) + p += F.softmax(z, 1) + p = p / len(self.C) + return p diff --git a/Dassl.pytorch/dassl/engine/da/mcd.py b/Dassl.pytorch/dassl/engine/da/mcd.py new file mode 100644 index 0000000000000000000000000000000000000000..174a2e05c66bdb0b5ff27e386063ba4f30aa398e --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/mcd.py @@ -0,0 +1,105 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.engine.trainer import SimpleNet + + +@TRAINER_REGISTRY.register() +class MCD(TrainerXU): + """Maximum Classifier Discrepancy. + + https://arxiv.org/abs/1712.02560. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.n_step_F = cfg.TRAINER.MCD.N_STEP_F + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building C1") + self.C1 = nn.Linear(fdim, self.num_classes) + self.C1.to(self.device) + print("# params: {:,}".format(count_num_param(self.C1))) + self.optim_C1 = build_optimizer(self.C1, cfg.OPTIM) + self.sched_C1 = build_lr_scheduler(self.optim_C1, cfg.OPTIM) + self.register_model("C1", self.C1, self.optim_C1, self.sched_C1) + + print("Building C2") + self.C2 = nn.Linear(fdim, self.num_classes) + self.C2.to(self.device) + print("# params: {:,}".format(count_num_param(self.C2))) + self.optim_C2 = build_optimizer(self.C2, cfg.OPTIM) + self.sched_C2 = build_lr_scheduler(self.optim_C2, cfg.OPTIM) + self.register_model("C2", self.C2, self.optim_C2, self.sched_C2) + + def forward_backward(self, batch_x, batch_u): + parsed = self.parse_batch_train(batch_x, batch_u) + input_x, label_x, input_u = parsed + + # Step A + feat_x = self.F(input_x) + logit_x1 = self.C1(feat_x) + logit_x2 = self.C2(feat_x) + loss_x1 = F.cross_entropy(logit_x1, label_x) + loss_x2 = F.cross_entropy(logit_x2, label_x) + loss_step_A = loss_x1 + loss_x2 + self.model_backward_and_update(loss_step_A) + + # Step B + with torch.no_grad(): + feat_x = self.F(input_x) + logit_x1 = self.C1(feat_x) + logit_x2 = self.C2(feat_x) + loss_x1 = F.cross_entropy(logit_x1, label_x) + loss_x2 = F.cross_entropy(logit_x2, label_x) + loss_x = loss_x1 + loss_x2 + + with torch.no_grad(): + feat_u = self.F(input_u) + pred_u1 = F.softmax(self.C1(feat_u), 1) + pred_u2 = F.softmax(self.C2(feat_u), 1) + loss_dis = self.discrepancy(pred_u1, pred_u2) + + loss_step_B = loss_x - loss_dis + self.model_backward_and_update(loss_step_B, ["C1", "C2"]) + + # Step C + for _ in range(self.n_step_F): + feat_u = self.F(input_u) + pred_u1 = F.softmax(self.C1(feat_u), 1) + pred_u2 = F.softmax(self.C2(feat_u), 1) + loss_step_C = self.discrepancy(pred_u1, pred_u2) + self.model_backward_and_update(loss_step_C, "F") + + loss_summary = { + "loss_step_A": loss_step_A.item(), + "loss_step_B": loss_step_B.item(), + "loss_step_C": loss_step_C.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def discrepancy(self, y1, y2): + return (y1 - y2).abs().mean() + + def model_inference(self, input): + feat = self.F(input) + return self.C1(feat) diff --git a/Dassl.pytorch/dassl/engine/da/mme.py b/Dassl.pytorch/dassl/engine/da/mme.py new file mode 100644 index 0000000000000000000000000000000000000000..fd7775c632db2f1fdd576e0cae06e586c71a05db --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/mme.py @@ -0,0 +1,86 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops import ReverseGrad +from dassl.engine.trainer import SimpleNet + + +class Prototypes(nn.Module): + + def __init__(self, fdim, num_classes, temp=0.05): + super().__init__() + self.prototypes = nn.Linear(fdim, num_classes, bias=False) + self.temp = temp + + def forward(self, x): + x = F.normalize(x, p=2, dim=1) + out = self.prototypes(x) + out = out / self.temp + return out + + +@TRAINER_REGISTRY.register() +class MME(TrainerXU): + """Minimax Entropy. + + https://arxiv.org/abs/1904.06487. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.lmda = cfg.TRAINER.MME.LMDA + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building C") + self.C = Prototypes(self.F.fdim, self.num_classes) + self.C.to(self.device) + print("# params: {:,}".format(count_num_param(self.C))) + self.optim_C = build_optimizer(self.C, cfg.OPTIM) + self.sched_C = build_lr_scheduler(self.optim_C, cfg.OPTIM) + self.register_model("C", self.C, self.optim_C, self.sched_C) + + self.revgrad = ReverseGrad() + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + + feat_x = self.F(input_x) + logit_x = self.C(feat_x) + loss_x = F.cross_entropy(logit_x, label_x) + self.model_backward_and_update(loss_x) + + feat_u = self.F(input_u) + feat_u = self.revgrad(feat_u) + logit_u = self.C(feat_u) + prob_u = F.softmax(logit_u, 1) + loss_u = -(-prob_u * torch.log(prob_u + 1e-5)).sum(1).mean() + self.model_backward_and_update(loss_u * self.lmda) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def model_inference(self, input): + return self.C(self.F(input)) diff --git a/Dassl.pytorch/dassl/engine/da/se.py b/Dassl.pytorch/dassl/engine/da/se.py new file mode 100644 index 0000000000000000000000000000000000000000..b0f498a3480c6efc64cb885822e4ca8ef68c18b3 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/se.py @@ -0,0 +1,78 @@ +import copy +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops.utils import sigmoid_rampup, ema_model_update + + +@TRAINER_REGISTRY.register() +class SE(TrainerXU): + """Self-ensembling for visual domain adaptation. + + https://arxiv.org/abs/1706.05208. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.ema_alpha = cfg.TRAINER.SE.EMA_ALPHA + self.conf_thre = cfg.TRAINER.SE.CONF_THRE + self.rampup = cfg.TRAINER.SE.RAMPUP + + self.teacher = copy.deepcopy(self.model) + self.teacher.train() + for param in self.teacher.parameters(): + param.requires_grad_(False) + + def check_cfg(self, cfg): + assert cfg.DATALOADER.K_TRANSFORMS == 2 + + def forward_backward(self, batch_x, batch_u): + global_step = self.batch_idx + self.epoch * self.num_batches + parsed = self.parse_batch_train(batch_x, batch_u) + input_x, label_x, input_u1, input_u2 = parsed + + logit_x = self.model(input_x) + loss_x = F.cross_entropy(logit_x, label_x) + + prob_u = F.softmax(self.model(input_u1), 1) + t_prob_u = F.softmax(self.teacher(input_u2), 1) + loss_u = ((prob_u - t_prob_u)**2).sum(1) + + if self.conf_thre: + max_prob = t_prob_u.max(1)[0] + mask = (max_prob > self.conf_thre).float() + loss_u = (loss_u * mask).mean() + else: + weight_u = sigmoid_rampup(global_step, self.rampup) + loss_u = loss_u.mean() * weight_u + + loss = loss_x + loss_u + self.model_backward_and_update(loss) + + ema_alpha = min(1 - 1 / (global_step+1), self.ema_alpha) + ema_model_update(self.model, self.teacher, ema_alpha) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"][0] + label_x = batch_x["label"] + input_u = batch_u["img"] + input_u1, input_u2 = input_u + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u1 = input_u1.to(self.device) + input_u2 = input_u2.to(self.device) + + return input_x, label_x, input_u1, input_u2 diff --git a/Dassl.pytorch/dassl/engine/da/source_only.py b/Dassl.pytorch/dassl/engine/da/source_only.py new file mode 100644 index 0000000000000000000000000000000000000000..2e7d9a683c9af49f2e123621fe3795282b82acc2 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/da/source_only.py @@ -0,0 +1,34 @@ +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class SourceOnly(TrainerXU): + """Baseline model for domain adaptation, which is + trained using source data only. + """ + + def forward_backward(self, batch_x, batch_u): + input, label = self.parse_batch_train(batch_x, batch_u) + output = self.model(input) + loss = F.cross_entropy(output, label) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input = batch_x["img"] + label = batch_x["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label diff --git a/Dassl.pytorch/dassl/engine/dg/__init__.py b/Dassl.pytorch/dassl/engine/dg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..23146a4a3318f64f1b5fe589ece626331ec7f52e --- /dev/null +++ b/Dassl.pytorch/dassl/engine/dg/__init__.py @@ -0,0 +1,5 @@ +from .ddaig import DDAIG +from .daeldg import DAELDG +from .vanilla import Vanilla +from .crossgrad import CrossGrad +from .domain_mix import DomainMix diff --git a/Dassl.pytorch/dassl/engine/dg/crossgrad.py b/Dassl.pytorch/dassl/engine/dg/crossgrad.py new file mode 100644 index 0000000000000000000000000000000000000000..ad9a6bd5a3b4a66101ca5d8de9891307db9a4462 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/dg/crossgrad.py @@ -0,0 +1,83 @@ +import torch +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.engine.trainer import SimpleNet + + +@TRAINER_REGISTRY.register() +class CrossGrad(TrainerX): + """Cross-gradient training. + + https://arxiv.org/abs/1804.10745. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.eps_f = cfg.TRAINER.CROSSGRAD.EPS_F + self.eps_d = cfg.TRAINER.CROSSGRAD.EPS_D + self.alpha_f = cfg.TRAINER.CROSSGRAD.ALPHA_F + self.alpha_d = cfg.TRAINER.CROSSGRAD.ALPHA_D + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, self.num_classes) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building D") + self.D = SimpleNet(cfg, cfg.MODEL, self.num_source_domains) + self.D.to(self.device) + print("# params: {:,}".format(count_num_param(self.D))) + self.optim_D = build_optimizer(self.D, cfg.OPTIM) + self.sched_D = build_lr_scheduler(self.optim_D, cfg.OPTIM) + self.register_model("D", self.D, self.optim_D, self.sched_D) + + def forward_backward(self, batch): + input, label, domain = self.parse_batch_train(batch) + + input.requires_grad = True + + # Compute domain perturbation + loss_d = F.cross_entropy(self.D(input), domain) + loss_d.backward() + grad_d = torch.clamp(input.grad.data, min=-0.1, max=0.1) + input_d = input.data + self.eps_f * grad_d + + # Compute label perturbation + input.grad.data.zero_() + loss_f = F.cross_entropy(self.F(input), label) + loss_f.backward() + grad_f = torch.clamp(input.grad.data, min=-0.1, max=0.1) + input_f = input.data + self.eps_d * grad_f + + input = input.detach() + + # Update label net + loss_f1 = F.cross_entropy(self.F(input), label) + loss_f2 = F.cross_entropy(self.F(input_d), label) + loss_f = (1 - self.alpha_f) * loss_f1 + self.alpha_f * loss_f2 + self.model_backward_and_update(loss_f, "F") + + # Update domain net + loss_d1 = F.cross_entropy(self.D(input), domain) + loss_d2 = F.cross_entropy(self.D(input_f), domain) + loss_d = (1 - self.alpha_d) * loss_d1 + self.alpha_d * loss_d2 + self.model_backward_and_update(loss_d, "D") + + loss_summary = {"loss_f": loss_f.item(), "loss_d": loss_d.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def model_inference(self, input): + return self.F(input) diff --git a/Dassl.pytorch/dassl/engine/dg/daeldg.py b/Dassl.pytorch/dassl/engine/dg/daeldg.py new file mode 100644 index 0000000000000000000000000000000000000000..8d6d11c46db3464659fa3fc6605d3eb197056964 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/dg/daeldg.py @@ -0,0 +1,168 @@ +import torch +import torch.nn as nn + +from dassl.data import DataManager +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.engine.trainer import SimpleNet +from dassl.data.transforms import build_transform +from dassl.modeling.ops.utils import create_onehot + + +class Experts(nn.Module): + + def __init__(self, n_source, fdim, num_classes): + super().__init__() + self.linears = nn.ModuleList( + [nn.Linear(fdim, num_classes) for _ in range(n_source)] + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, i, x): + x = self.linears[i](x) + x = self.softmax(x) + return x + + +@TRAINER_REGISTRY.register() +class DAELDG(TrainerX): + """Domain Adaptive Ensemble Learning. + + DG version: only use labeled source data. + + https://arxiv.org/abs/2003.07325. + """ + + def __init__(self, cfg): + super().__init__(cfg) + n_domain = cfg.DATALOADER.TRAIN_X.N_DOMAIN + batch_size = cfg.DATALOADER.TRAIN_X.BATCH_SIZE + if n_domain <= 0: + n_domain = self.num_source_domains + self.split_batch = batch_size // n_domain + self.n_domain = n_domain + self.conf_thre = cfg.TRAINER.DAELDG.CONF_THRE + + def check_cfg(self, cfg): + assert cfg.DATALOADER.TRAIN_X.SAMPLER == "RandomDomainSampler" + assert len(cfg.TRAINER.DAELDG.STRONG_TRANSFORMS) > 0 + + def build_data_loader(self): + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.DAELDG.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = dm.train_loader_x + self.train_loader_u = dm.train_loader_u + self.val_loader = dm.val_loader + self.test_loader = dm.test_loader + self.num_classes = dm.num_classes + self.num_source_domains = dm.num_source_domains + self.lab2cname = dm.lab2cname + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, 0) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + fdim = self.F.fdim + + print("Building E") + self.E = Experts(self.num_source_domains, fdim, self.num_classes) + self.E.to(self.device) + print("# params: {:,}".format(count_num_param(self.E))) + self.optim_E = build_optimizer(self.E, cfg.OPTIM) + self.sched_E = build_lr_scheduler(self.optim_E, cfg.OPTIM) + self.register_model("E", self.E, self.optim_E, self.sched_E) + + def forward_backward(self, batch): + parsed_data = self.parse_batch_train(batch) + input, input2, label, domain = parsed_data + + input = torch.split(input, self.split_batch, 0) + input2 = torch.split(input2, self.split_batch, 0) + label = torch.split(label, self.split_batch, 0) + domain = torch.split(domain, self.split_batch, 0) + domain = [d[0].item() for d in domain] + + loss_x = 0 + loss_cr = 0 + acc = 0 + + feat = [self.F(x) for x in input] + feat2 = [self.F(x) for x in input2] + + for feat_i, feat2_i, label_i, i in zip(feat, feat2, label, domain): + cr_s = [j for j in domain if j != i] + + # Learning expert + pred_i = self.E(i, feat_i) + loss_x += (-label_i * torch.log(pred_i + 1e-5)).sum(1).mean() + expert_label_i = pred_i.detach() + acc += compute_accuracy(pred_i.detach(), + label_i.max(1)[1])[0].item() + + # Consistency regularization + cr_pred = [] + for j in cr_s: + pred_j = self.E(j, feat2_i) + pred_j = pred_j.unsqueeze(1) + cr_pred.append(pred_j) + cr_pred = torch.cat(cr_pred, 1) + cr_pred = cr_pred.mean(1) + loss_cr += ((cr_pred - expert_label_i)**2).sum(1).mean() + + loss_x /= self.n_domain + loss_cr /= self.n_domain + acc /= self.n_domain + + loss = 0 + loss += loss_x + loss += loss_cr + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc": acc, + "loss_cr": loss_cr.item() + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + input2 = batch["img2"] + label = batch["label"] + domain = batch["domain"] + + label = create_onehot(label, self.num_classes) + + input = input.to(self.device) + input2 = input2.to(self.device) + label = label.to(self.device) + + return input, input2, label, domain + + def model_inference(self, input): + f = self.F(input) + p = [] + for k in range(self.num_source_domains): + p_k = self.E(k, f) + p_k = p_k.unsqueeze(1) + p.append(p_k) + p = torch.cat(p, 1) + p = p.mean(1) + return p diff --git a/Dassl.pytorch/dassl/engine/dg/ddaig.py b/Dassl.pytorch/dassl/engine/dg/ddaig.py new file mode 100644 index 0000000000000000000000000000000000000000..b7fbd97348bc346731631c7f456ec02d8cd1e222 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/dg/ddaig.py @@ -0,0 +1,107 @@ +import torch +from torch.nn import functional as F + +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import count_num_param +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.modeling import build_network +from dassl.engine.trainer import SimpleNet + + +@TRAINER_REGISTRY.register() +class DDAIG(TrainerX): + """Deep Domain-Adversarial Image Generation. + + https://arxiv.org/abs/2003.06054. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.lmda = cfg.TRAINER.DDAIG.LMDA + self.clamp = cfg.TRAINER.DDAIG.CLAMP + self.clamp_min = cfg.TRAINER.DDAIG.CLAMP_MIN + self.clamp_max = cfg.TRAINER.DDAIG.CLAMP_MAX + self.warmup = cfg.TRAINER.DDAIG.WARMUP + self.alpha = cfg.TRAINER.DDAIG.ALPHA + + def build_model(self): + cfg = self.cfg + + print("Building F") + self.F = SimpleNet(cfg, cfg.MODEL, self.num_classes) + self.F.to(self.device) + print("# params: {:,}".format(count_num_param(self.F))) + self.optim_F = build_optimizer(self.F, cfg.OPTIM) + self.sched_F = build_lr_scheduler(self.optim_F, cfg.OPTIM) + self.register_model("F", self.F, self.optim_F, self.sched_F) + + print("Building D") + self.D = SimpleNet(cfg, cfg.MODEL, self.num_source_domains) + self.D.to(self.device) + print("# params: {:,}".format(count_num_param(self.D))) + self.optim_D = build_optimizer(self.D, cfg.OPTIM) + self.sched_D = build_lr_scheduler(self.optim_D, cfg.OPTIM) + self.register_model("D", self.D, self.optim_D, self.sched_D) + + print("Building G") + self.G = build_network(cfg.TRAINER.DDAIG.G_ARCH, verbose=cfg.VERBOSE) + self.G.to(self.device) + print("# params: {:,}".format(count_num_param(self.G))) + self.optim_G = build_optimizer(self.G, cfg.OPTIM) + self.sched_G = build_lr_scheduler(self.optim_G, cfg.OPTIM) + self.register_model("G", self.G, self.optim_G, self.sched_G) + + def forward_backward(self, batch): + input, label, domain = self.parse_batch_train(batch) + + ############# + # Update G + ############# + input_p = self.G(input, lmda=self.lmda) + if self.clamp: + input_p = torch.clamp( + input_p, min=self.clamp_min, max=self.clamp_max + ) + loss_g = 0 + # Minimize label loss + loss_g += F.cross_entropy(self.F(input_p), label) + # Maximize domain loss + loss_g -= F.cross_entropy(self.D(input_p), domain) + self.model_backward_and_update(loss_g, "G") + + # Perturb data with new G + with torch.no_grad(): + input_p = self.G(input, lmda=self.lmda) + if self.clamp: + input_p = torch.clamp( + input_p, min=self.clamp_min, max=self.clamp_max + ) + + ############# + # Update F + ############# + loss_f = F.cross_entropy(self.F(input), label) + if (self.epoch + 1) > self.warmup: + loss_fp = F.cross_entropy(self.F(input_p), label) + loss_f = (1.0 - self.alpha) * loss_f + self.alpha * loss_fp + self.model_backward_and_update(loss_f, "F") + + ############# + # Update D + ############# + loss_d = F.cross_entropy(self.D(input), domain) + self.model_backward_and_update(loss_d, "D") + + loss_summary = { + "loss_g": loss_g.item(), + "loss_f": loss_f.item(), + "loss_d": loss_d.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def model_inference(self, input): + return self.F(input) diff --git a/Dassl.pytorch/dassl/engine/dg/domain_mix.py b/Dassl.pytorch/dassl/engine/dg/domain_mix.py new file mode 100644 index 0000000000000000000000000000000000000000..654f2706f3c50e2ace45383c228e7784084d6b11 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/dg/domain_mix.py @@ -0,0 +1,81 @@ +import torch +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy + +__all__ = ["DomainMix"] + + +@TRAINER_REGISTRY.register() +class DomainMix(TrainerX): + """DomainMix. + + Dynamic Domain Generalization. + + https://github.com/MetaVisionLab/DDG + """ + + def __init__(self, cfg): + super(DomainMix, self).__init__(cfg) + self.mix_type = cfg.TRAINER.DOMAINMIX.TYPE + self.alpha = cfg.TRAINER.DOMAINMIX.ALPHA + self.beta = cfg.TRAINER.DOMAINMIX.BETA + self.dist_beta = torch.distributions.Beta(self.alpha, self.beta) + + def forward_backward(self, batch): + images, label_a, label_b, lam = self.parse_batch_train(batch) + output = self.model(images) + loss = lam * F.cross_entropy( + output, label_a + ) + (1-lam) * F.cross_entropy(output, label_b) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label_a)[0].item() + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + images = batch["img"] + target = batch["label"] + domain = batch["domain"] + images = images.to(self.device) + target = target.to(self.device) + domain = domain.to(self.device) + images, target_a, target_b, lam = self.domain_mix( + images, target, domain + ) + return images, target_a, target_b, lam + + def domain_mix(self, x, target, domain): + lam = ( + self.dist_beta.rsample((1, )) + if self.alpha > 0 else torch.tensor(1) + ).to(x.device) + + # random shuffle + perm = torch.randperm(x.size(0), dtype=torch.int64, device=x.device) + if self.mix_type == "crossdomain": + domain_list = torch.unique(domain) + if len(domain_list) > 1: + for idx in domain_list: + cnt_a = torch.sum(domain == idx) + idx_b = (domain != idx).nonzero().squeeze(-1) + cnt_b = idx_b.shape[0] + perm_b = torch.ones(cnt_b).multinomial( + num_samples=cnt_a, replacement=bool(cnt_a > cnt_b) + ) + perm[domain == idx] = idx_b[perm_b] + elif self.mix_type != "random": + raise NotImplementedError( + f"Chooses {'random', 'crossdomain'}, but got {self.mix_type}." + ) + mixed_x = lam*x + (1-lam) * x[perm, :] + target_a, target_b = target, target[perm] + return mixed_x, target_a, target_b, lam diff --git a/Dassl.pytorch/dassl/engine/dg/vanilla.py b/Dassl.pytorch/dassl/engine/dg/vanilla.py new file mode 100644 index 0000000000000000000000000000000000000000..e35f30a1d32935c3440f16e78eed4517fee56021 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/dg/vanilla.py @@ -0,0 +1,35 @@ +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class Vanilla(TrainerX): + """Vanilla model. + + A.k.a. Empirical Risk Minimization, or ERM. + """ + + def forward_backward(self, batch): + input, target = self.parse_batch_train(batch) + output = self.model(input) + loss = F.cross_entropy(output, target) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, target)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + target = batch["label"] + input = input.to(self.device) + target = target.to(self.device) + return input, target diff --git a/Dassl.pytorch/dassl/engine/ssl/__init__.py b/Dassl.pytorch/dassl/engine/ssl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..46fa781f701ad9b228317ffe69a741fc37799e53 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/ssl/__init__.py @@ -0,0 +1,5 @@ +from .entmin import EntMin +from .fixmatch import FixMatch +from .mixmatch import MixMatch +from .mean_teacher import MeanTeacher +from .sup_baseline import SupBaseline diff --git a/Dassl.pytorch/dassl/engine/ssl/entmin.py b/Dassl.pytorch/dassl/engine/ssl/entmin.py new file mode 100644 index 0000000000000000000000000000000000000000..a17186a85eeceefb3537ac243ed88e4421e77bf4 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/ssl/entmin.py @@ -0,0 +1,41 @@ +import torch +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class EntMin(TrainerXU): + """Entropy Minimization. + + http://papers.nips.cc/paper/2740-semi-supervised-learning-by-entropy-minimization.pdf. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.lmda = cfg.TRAINER.ENTMIN.LMDA + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + + output_x = self.model(input_x) + loss_x = F.cross_entropy(output_x, label_x) + + output_u = F.softmax(self.model(input_u), 1) + loss_u = (-output_u * torch.log(output_u + 1e-5)).sum(1).mean() + + loss = loss_x + loss_u * self.lmda + + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(output_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/Dassl.pytorch/dassl/engine/ssl/fixmatch.py b/Dassl.pytorch/dassl/engine/ssl/fixmatch.py new file mode 100644 index 0000000000000000000000000000000000000000..be6001f8d98a2c4193eeb6257c7859aaff26d673 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/ssl/fixmatch.py @@ -0,0 +1,112 @@ +import torch +from torch.nn import functional as F + +from dassl.data import DataManager +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.data.transforms import build_transform + + +@TRAINER_REGISTRY.register() +class FixMatch(TrainerXU): + """FixMatch: Simplifying Semi-Supervised Learning with + Consistency and Confidence. + + https://arxiv.org/abs/2001.07685. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.weight_u = cfg.TRAINER.FIXMATCH.WEIGHT_U + self.conf_thre = cfg.TRAINER.FIXMATCH.CONF_THRE + + def check_cfg(self, cfg): + assert len(cfg.TRAINER.FIXMATCH.STRONG_TRANSFORMS) > 0 + + def build_data_loader(self): + cfg = self.cfg + tfm_train = build_transform(cfg, is_train=True) + custom_tfm_train = [tfm_train] + choices = cfg.TRAINER.FIXMATCH.STRONG_TRANSFORMS + tfm_train_strong = build_transform(cfg, is_train=True, choices=choices) + custom_tfm_train += [tfm_train_strong] + self.dm = DataManager(self.cfg, custom_tfm_train=custom_tfm_train) + self.train_loader_x = self.dm.train_loader_x + self.train_loader_u = self.dm.train_loader_u + self.val_loader = self.dm.val_loader + self.test_loader = self.dm.test_loader + self.num_classes = self.dm.num_classes + + def assess_y_pred_quality(self, y_pred, y_true, mask): + n_masked_correct = (y_pred.eq(y_true).float() * mask).sum() + acc_thre = n_masked_correct / (mask.sum() + 1e-5) + acc_raw = y_pred.eq(y_true).sum() / y_pred.numel() # raw accuracy + keep_rate = mask.sum() / mask.numel() + output = { + "acc_thre": acc_thre, + "acc_raw": acc_raw, + "keep_rate": keep_rate + } + return output + + def forward_backward(self, batch_x, batch_u): + parsed_data = self.parse_batch_train(batch_x, batch_u) + input_x, input_x2, label_x, input_u, input_u2, label_u = parsed_data + input_u = torch.cat([input_x, input_u], 0) + input_u2 = torch.cat([input_x2, input_u2], 0) + n_x = input_x.size(0) + + # Generate pseudo labels + with torch.no_grad(): + output_u = F.softmax(self.model(input_u), 1) + max_prob, label_u_pred = output_u.max(1) + mask_u = (max_prob >= self.conf_thre).float() + + # Evaluate pseudo labels' accuracy + y_u_pred_stats = self.assess_y_pred_quality( + label_u_pred[n_x:], label_u, mask_u[n_x:] + ) + + # Supervised loss + output_x = self.model(input_x) + loss_x = F.cross_entropy(output_x, label_x) + + # Unsupervised loss + output_u = self.model(input_u2) + loss_u = F.cross_entropy(output_u, label_u_pred, reduction="none") + loss_u = (loss_u * mask_u).mean() + + loss = loss_x + loss_u * self.weight_u + self.model_backward_and_update(loss) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(output_x, label_x)[0].item(), + "loss_u": loss_u.item(), + "y_u_pred_acc_raw": y_u_pred_stats["acc_raw"], + "y_u_pred_acc_thre": y_u_pred_stats["acc_thre"], + "y_u_pred_keep": y_u_pred_stats["keep_rate"], + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + input_x2 = batch_x["img2"] + label_x = batch_x["label"] + input_u = batch_u["img"] + input_u2 = batch_u["img2"] + # label_u is used only for evaluating pseudo labels' accuracy + label_u = batch_u["label"] + + input_x = input_x.to(self.device) + input_x2 = input_x2.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + input_u2 = input_u2.to(self.device) + label_u = label_u.to(self.device) + + return input_x, input_x2, label_x, input_u, input_u2, label_u diff --git a/Dassl.pytorch/dassl/engine/ssl/mean_teacher.py b/Dassl.pytorch/dassl/engine/ssl/mean_teacher.py new file mode 100644 index 0000000000000000000000000000000000000000..054dc490f7984d816c5e85ce695c33a9cc0b0b7b --- /dev/null +++ b/Dassl.pytorch/dassl/engine/ssl/mean_teacher.py @@ -0,0 +1,54 @@ +import copy +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy +from dassl.modeling.ops.utils import sigmoid_rampup, ema_model_update + + +@TRAINER_REGISTRY.register() +class MeanTeacher(TrainerXU): + """Mean teacher. + + https://arxiv.org/abs/1703.01780. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.weight_u = cfg.TRAINER.MEANTEACHER.WEIGHT_U + self.ema_alpha = cfg.TRAINER.MEANTEACHER.EMA_ALPHA + self.rampup = cfg.TRAINER.MEANTEACHER.RAMPUP + + self.teacher = copy.deepcopy(self.model) + self.teacher.train() + for param in self.teacher.parameters(): + param.requires_grad_(False) + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + + logit_x = self.model(input_x) + loss_x = F.cross_entropy(logit_x, label_x) + + target_u = F.softmax(self.teacher(input_u), 1) + prob_u = F.softmax(self.model(input_u), 1) + loss_u = ((prob_u - target_u)**2).sum(1).mean() + + weight_u = self.weight_u * sigmoid_rampup(self.epoch, self.rampup) + loss = loss_x + loss_u*weight_u + self.model_backward_and_update(loss) + + global_step = self.batch_idx + self.epoch * self.num_batches + ema_alpha = min(1 - 1 / (global_step+1), self.ema_alpha) + ema_model_update(self.model, self.teacher, ema_alpha) + + loss_summary = { + "loss_x": loss_x.item(), + "acc_x": compute_accuracy(logit_x, label_x)[0].item(), + "loss_u": loss_u.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary diff --git a/Dassl.pytorch/dassl/engine/ssl/mixmatch.py b/Dassl.pytorch/dassl/engine/ssl/mixmatch.py new file mode 100644 index 0000000000000000000000000000000000000000..6bb24e165ea8aba29fcadc720bb222b82df0f2f0 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/ssl/mixmatch.py @@ -0,0 +1,98 @@ +import torch +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.modeling.ops import mixup +from dassl.modeling.ops.utils import ( + sharpen_prob, create_onehot, linear_rampup, shuffle_index +) + + +@TRAINER_REGISTRY.register() +class MixMatch(TrainerXU): + """MixMatch: A Holistic Approach to Semi-Supervised Learning. + + https://arxiv.org/abs/1905.02249. + """ + + def __init__(self, cfg): + super().__init__(cfg) + self.weight_u = cfg.TRAINER.MIXMATCH.WEIGHT_U + self.temp = cfg.TRAINER.MIXMATCH.TEMP + self.beta = cfg.TRAINER.MIXMATCH.MIXUP_BETA + self.rampup = cfg.TRAINER.MIXMATCH.RAMPUP + + def check_cfg(self, cfg): + assert cfg.DATALOADER.K_TRANSFORMS > 1 + + def forward_backward(self, batch_x, batch_u): + input_x, label_x, input_u = self.parse_batch_train(batch_x, batch_u) + num_x = input_x.shape[0] + + global_step = self.batch_idx + self.epoch * self.num_batches + weight_u = self.weight_u * linear_rampup(global_step, self.rampup) + + # Generate pseudo-label for unlabeled data + with torch.no_grad(): + output_u = 0 + for input_ui in input_u: + output_ui = F.softmax(self.model(input_ui), 1) + output_u += output_ui + output_u /= len(input_u) + label_u = sharpen_prob(output_u, self.temp) + label_u = [label_u] * len(input_u) + label_u = torch.cat(label_u, 0) + input_u = torch.cat(input_u, 0) + + # Combine and shuffle labeled and unlabeled data + input_xu = torch.cat([input_x, input_u], 0) + label_xu = torch.cat([label_x, label_u], 0) + input_xu, label_xu = shuffle_index(input_xu, label_xu) + + # Mixup + input_x, label_x = mixup( + input_x, + input_xu[:num_x], + label_x, + label_xu[:num_x], + self.beta, + preserve_order=True, + ) + + input_u, label_u = mixup( + input_u, + input_xu[num_x:], + label_u, + label_xu[num_x:], + self.beta, + preserve_order=True, + ) + + # Compute losses + output_x = F.softmax(self.model(input_x), 1) + loss_x = (-label_x * torch.log(output_x + 1e-5)).sum(1).mean() + + output_u = F.softmax(self.model(input_u), 1) + loss_u = ((label_u - output_u)**2).mean() + + loss = loss_x + loss_u*weight_u + self.model_backward_and_update(loss) + + loss_summary = {"loss_x": loss_x.item(), "loss_u": loss_u.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"][0] + label_x = batch_x["label"] + label_x = create_onehot(label_x, self.num_classes) + input_u = batch_u["img"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u = [input_ui.to(self.device) for input_ui in input_u] + + return input_x, label_x, input_u diff --git a/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py b/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py new file mode 100644 index 0000000000000000000000000000000000000000..b2f5228e2cd640dad711a8aefb983ec092050627 --- /dev/null +++ b/Dassl.pytorch/dassl/engine/ssl/sup_baseline.py @@ -0,0 +1,32 @@ +from torch.nn import functional as F + +from dassl.engine import TRAINER_REGISTRY, TrainerXU +from dassl.metrics import compute_accuracy + + +@TRAINER_REGISTRY.register() +class SupBaseline(TrainerXU): + """Supervised Baseline.""" + + def forward_backward(self, batch_x, batch_u): + input, label = self.parse_batch_train(batch_x, batch_u) + output = self.model(input) + loss = F.cross_entropy(output, label) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch_x, batch_u): + input = batch_x["img"] + label = batch_x["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label diff --git a/Dassl.pytorch/dassl/engine/trainer.py b/Dassl.pytorch/dassl/engine/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..57bec14772b140a89db7591ad5bf5be343edf29c --- /dev/null +++ b/Dassl.pytorch/dassl/engine/trainer.py @@ -0,0 +1,637 @@ +import time +import numpy as np +import os.path as osp +import datetime +from collections import OrderedDict +import torch +import torch.nn as nn +from tqdm import tqdm +from torch.utils.tensorboard import SummaryWriter + +from dassl.data import DataManager +from dassl.optim import build_optimizer, build_lr_scheduler +from dassl.utils import ( + MetricMeter, AverageMeter, tolist_if_not, count_num_param, load_checkpoint, + save_checkpoint, mkdir_if_missing, resume_from_checkpoint, + load_pretrained_weights +) +from dassl.modeling import build_head, build_backbone +from dassl.evaluation import build_evaluator + + +class SimpleNet(nn.Module): + """A simple neural network composed of a CNN backbone + and optionally a head such as mlp for classification. + """ + + def __init__(self, cfg, model_cfg, num_classes, **kwargs): + super().__init__() + self.backbone = build_backbone( + model_cfg.BACKBONE.NAME, + verbose=cfg.VERBOSE, + pretrained=model_cfg.BACKBONE.PRETRAINED, + **kwargs, + ) + fdim = self.backbone.out_features + + self.head = None + if model_cfg.HEAD.NAME and model_cfg.HEAD.HIDDEN_LAYERS: + self.head = build_head( + model_cfg.HEAD.NAME, + verbose=cfg.VERBOSE, + in_features=fdim, + hidden_layers=model_cfg.HEAD.HIDDEN_LAYERS, + activation=model_cfg.HEAD.ACTIVATION, + bn=model_cfg.HEAD.BN, + dropout=model_cfg.HEAD.DROPOUT, + **kwargs, + ) + fdim = self.head.out_features + + self.classifier = None + if num_classes > 0: + self.classifier = nn.Linear(fdim, num_classes) + + self._fdim = fdim + + @property + def fdim(self): + return self._fdim + + def forward(self, x, return_feature=False): + f = self.backbone(x) + if self.head is not None: + f = self.head(f) + + if self.classifier is None: + return f + + y = self.classifier(f) + + if return_feature: + return y, f + + return y + + +class TrainerBase: + """Base class for iterative trainer.""" + + def __init__(self): + self._models = OrderedDict() + self._optims = OrderedDict() + self._scheds = OrderedDict() + self._writer = None + + def register_model(self, name="model", model=None, optim=None, sched=None): + if self.__dict__.get("_models") is None: + raise AttributeError( + "Cannot assign model before super().__init__() call" + ) + + if self.__dict__.get("_optims") is None: + raise AttributeError( + "Cannot assign optim before super().__init__() call" + ) + + if self.__dict__.get("_scheds") is None: + raise AttributeError( + "Cannot assign sched before super().__init__() call" + ) + + assert name not in self._models, "Found duplicate model names" + + self._models[name] = model + self._optims[name] = optim + self._scheds[name] = sched + + def get_model_names(self, names=None): + names_real = list(self._models.keys()) + if names is not None: + names = tolist_if_not(names) + for name in names: + assert name in names_real + return names + else: + return names_real + + def save_model( + self, epoch, directory, is_best=False, val_result=None, model_name="" + ): + names = self.get_model_names() + + for name in names: + model_dict = self._models[name].state_dict() + + optim_dict = None + if self._optims[name] is not None: + optim_dict = self._optims[name].state_dict() + + sched_dict = None + if self._scheds[name] is not None: + sched_dict = self._scheds[name].state_dict() + + save_checkpoint( + { + "state_dict": model_dict, + "epoch": epoch + 1, + "optimizer": optim_dict, + "scheduler": sched_dict, + "val_result": val_result + }, + osp.join(directory, name), + is_best=is_best, + model_name=model_name, + ) + + def resume_model_if_exist(self, directory): + names = self.get_model_names() + file_missing = False + + for name in names: + path = osp.join(directory, name) + if not osp.exists(path): + file_missing = True + break + + if file_missing: + print("No checkpoint found, train from scratch") + return 0 + + print(f"Found checkpoint at {directory} (will resume training)") + + for name in names: + path = osp.join(directory, name) + start_epoch = resume_from_checkpoint( + path, self._models[name], self._optims[name], + self._scheds[name] + ) + + return start_epoch + + def load_model(self, directory, epoch=None): + if not directory: + print( + "Note that load_model() is skipped as no pretrained " + "model is given (ignore this if it's done on purpose)" + ) + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError(f"No model at {model_path}") + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + val_result = checkpoint["val_result"] + print( + f"Load {model_path} to {name} (epoch={epoch}, val_result={val_result:.1f})" + ) + self._models[name].load_state_dict(state_dict) + + def set_model_mode(self, mode="train", names=None): + names = self.get_model_names(names) + + for name in names: + if mode == "train": + self._models[name].train() + elif mode in ["test", "eval"]: + self._models[name].eval() + else: + raise KeyError + + def update_lr(self, names=None): + names = self.get_model_names(names) + + for name in names: + if self._scheds[name] is not None: + self._scheds[name].step() + + def detect_anomaly(self, loss): + if not torch.isfinite(loss).all(): + raise FloatingPointError("Loss is infinite or NaN!") + + def init_writer(self, log_dir): + if self.__dict__.get("_writer") is None or self._writer is None: + print(f"Initialize tensorboard (log_dir={log_dir})") + self._writer = SummaryWriter(log_dir=log_dir) + + def close_writer(self): + if self._writer is not None: + self._writer.close() + + def write_scalar(self, tag, scalar_value, global_step=None): + if self._writer is None: + # Do nothing if writer is not initialized + # Note that writer is only used when training is needed + pass + else: + self._writer.add_scalar(tag, scalar_value, global_step) + + def train(self, start_epoch, max_epoch): + """Generic training loops.""" + self.start_epoch = start_epoch + self.max_epoch = max_epoch + + self.before_train() + for self.epoch in range(self.start_epoch, self.max_epoch): + self.before_epoch() + self.run_epoch() + self.after_epoch() + self.after_train() + + def before_train(self): + pass + + def after_train(self): + pass + + def before_epoch(self): + pass + + def after_epoch(self): + pass + + def run_epoch(self): + raise NotImplementedError + + def test(self): + raise NotImplementedError + + def parse_batch_train(self, batch): + raise NotImplementedError + + def parse_batch_test(self, batch): + raise NotImplementedError + + def forward_backward(self, batch): + raise NotImplementedError + + def model_inference(self, input): + raise NotImplementedError + + def model_zero_grad(self, names=None): + names = self.get_model_names(names) + for name in names: + if self._optims[name] is not None: + self._optims[name].zero_grad() + + def model_backward(self, loss): + self.detect_anomaly(loss) + loss.backward() + + def model_update(self, names=None): + names = self.get_model_names(names) + for name in names: + if self._optims[name] is not None: + self._optims[name].step() + + def model_backward_and_update(self, loss, names=None): + self.model_zero_grad(names) + self.model_backward(loss) + self.model_update(names) + + +class SimpleTrainer(TrainerBase): + """A simple trainer class implementing generic functions.""" + + def __init__(self, cfg): + super().__init__() + self.check_cfg(cfg) + + if torch.cuda.is_available() and cfg.USE_CUDA: + self.device = torch.device("cuda") + else: + self.device = torch.device("cpu") + + # Save as attributes some frequently used variables + self.start_epoch = self.epoch = 0 + self.max_epoch = cfg.OPTIM.MAX_EPOCH + self.output_dir = cfg.OUTPUT_DIR + + self.cfg = cfg + self.build_data_loader() + self.build_model() + self.evaluator = build_evaluator(cfg, lab2cname=self.lab2cname) + self.best_result = -np.inf + + def check_cfg(self, cfg): + """Check whether some variables are set correctly for + the trainer (optional). + + For example, a trainer might require a particular sampler + for training such as 'RandomDomainSampler', so it is good + to do the checking: + + assert cfg.DATALOADER.SAMPLER_TRAIN == 'RandomDomainSampler' + """ + pass + + def build_data_loader(self): + """Create essential data-related attributes. + + A re-implementation of this method must create the + same attributes (self.dm is optional). + """ + dm = DataManager(self.cfg) + + self.train_loader_x = dm.train_loader_x + self.train_loader_u = dm.train_loader_u # optional, can be None + self.val_loader = dm.val_loader # optional, can be None + self.test_loader = dm.test_loader + + self.num_classes = dm.num_classes + self.num_source_domains = dm.num_source_domains + self.lab2cname = dm.lab2cname # dict {label: classname} + + self.dm = dm + + def build_model(self): + """Build and register model. + + The default builds a classification model along with its + optimizer and scheduler. + + Custom trainers can re-implement this method if necessary. + """ + cfg = self.cfg + + print("Building model") + self.model = SimpleNet(cfg, cfg.MODEL, self.num_classes) + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + self.model.to(self.device) + print(f"# params: {count_num_param(self.model):,}") + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("model", self.model, self.optim, self.sched) + + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Detected {device_count} GPUs (use nn.DataParallel)") + self.model = nn.DataParallel(self.model) + + def train(self): + super().train(self.start_epoch, self.max_epoch) + + def before_train(self): + directory = self.cfg.OUTPUT_DIR + if self.cfg.RESUME: + directory = self.cfg.RESUME + self.start_epoch = self.resume_model_if_exist(directory) + + # Initialize summary writer + writer_dir = osp.join(self.output_dir, "tensorboard") + mkdir_if_missing(writer_dir) + self.init_writer(writer_dir) + + # Remember the starting time (for computing the elapsed time) + self.time_start = time.time() + + def after_train(self): + print("Finish training") + + do_test = not self.cfg.TEST.NO_TEST + if do_test: + if self.cfg.TEST.FINAL_MODEL == "best_val": + print("Deploy the model with the best val performance") + self.load_model(self.output_dir) + else: + print("Deploy the last-epoch model") + self.test() + + # Show elapsed time + elapsed = round(time.time() - self.time_start) + elapsed = str(datetime.timedelta(seconds=elapsed)) + print(f"Elapsed: {elapsed}") + + # Close writer + self.close_writer() + + def after_epoch(self): + last_epoch = (self.epoch + 1) == self.max_epoch + do_test = not self.cfg.TEST.NO_TEST + meet_checkpoint_freq = ( + (self.epoch + 1) % self.cfg.TRAIN.CHECKPOINT_FREQ == 0 + if self.cfg.TRAIN.CHECKPOINT_FREQ > 0 else False + ) + + if do_test and self.cfg.TEST.FINAL_MODEL == "best_val": + curr_result = self.test(split="val") + is_best = curr_result > self.best_result + if is_best: + self.best_result = curr_result + self.save_model( + self.epoch, + self.output_dir, + val_result=curr_result, + model_name="model-best.pth.tar" + ) + + if meet_checkpoint_freq or last_epoch: + self.save_model(self.epoch, self.output_dir) + + @torch.no_grad() + def test(self, split=None): + """A generic testing pipeline.""" + self.set_model_mode("eval") + self.evaluator.reset() + + if split is None: + split = self.cfg.TEST.SPLIT + + if split == "val" and self.val_loader is not None: + data_loader = self.val_loader + else: + split = "test" # in case val_loader is None + data_loader = self.test_loader + + print(f"Evaluate on the *{split}* set") + + for batch_idx, batch in enumerate(tqdm(data_loader)): + input, label = self.parse_batch_test(batch) + output = self.model_inference(input) + self.evaluator.process(output, label) + + results = self.evaluator.evaluate() + + for k, v in results.items(): + tag = f"{split}/{k}" + self.write_scalar(tag, v, self.epoch) + + return list(results.values())[0] + + def model_inference(self, input): + return self.model(input) + + def parse_batch_test(self, batch): + input = batch["img"] + label = batch["label"] + + input = input.to(self.device) + label = label.to(self.device) + + return input, label + + def get_current_lr(self, names=None): + names = self.get_model_names(names) + name = names[0] + return self._optims[name].param_groups[0]["lr"] + + +class TrainerXU(SimpleTrainer): + """A base trainer using both labeled and unlabeled data. + + In the context of domain adaptation, labeled and unlabeled data + come from source and target domains respectively. + + When it comes to semi-supervised learning, all data comes from the + same domain. + """ + + def run_epoch(self): + self.set_model_mode("train") + losses = MetricMeter() + batch_time = AverageMeter() + data_time = AverageMeter() + + # Decide to iterate over labeled or unlabeled dataset + len_train_loader_x = len(self.train_loader_x) + len_train_loader_u = len(self.train_loader_u) + if self.cfg.TRAIN.COUNT_ITER == "train_x": + self.num_batches = len_train_loader_x + elif self.cfg.TRAIN.COUNT_ITER == "train_u": + self.num_batches = len_train_loader_u + elif self.cfg.TRAIN.COUNT_ITER == "smaller_one": + self.num_batches = min(len_train_loader_x, len_train_loader_u) + else: + raise ValueError + + train_loader_x_iter = iter(self.train_loader_x) + train_loader_u_iter = iter(self.train_loader_u) + + end = time.time() + for self.batch_idx in range(self.num_batches): + try: + batch_x = next(train_loader_x_iter) + except StopIteration: + train_loader_x_iter = iter(self.train_loader_x) + batch_x = next(train_loader_x_iter) + + try: + batch_u = next(train_loader_u_iter) + except StopIteration: + train_loader_u_iter = iter(self.train_loader_u) + batch_u = next(train_loader_u_iter) + + data_time.update(time.time() - end) + loss_summary = self.forward_backward(batch_x, batch_u) + batch_time.update(time.time() - end) + losses.update(loss_summary) + + meet_freq = (self.batch_idx + 1) % self.cfg.TRAIN.PRINT_FREQ == 0 + only_few_batches = self.num_batches < self.cfg.TRAIN.PRINT_FREQ + if meet_freq or only_few_batches: + nb_remain = 0 + nb_remain += self.num_batches - self.batch_idx - 1 + nb_remain += ( + self.max_epoch - self.epoch - 1 + ) * self.num_batches + eta_seconds = batch_time.avg * nb_remain + eta = str(datetime.timedelta(seconds=int(eta_seconds))) + + info = [] + info += [f"epoch [{self.epoch + 1}/{self.max_epoch}]"] + info += [f"batch [{self.batch_idx + 1}/{self.num_batches}]"] + info += [f"time {batch_time.val:.3f} ({batch_time.avg:.3f})"] + info += [f"data {data_time.val:.3f} ({data_time.avg:.3f})"] + info += [f"{losses}"] + info += [f"lr {self.get_current_lr():.4e}"] + info += [f"eta {eta}"] + print(" ".join(info)) + + n_iter = self.epoch * self.num_batches + self.batch_idx + for name, meter in losses.meters.items(): + self.write_scalar("train/" + name, meter.avg, n_iter) + self.write_scalar("train/lr", self.get_current_lr(), n_iter) + + end = time.time() + + def parse_batch_train(self, batch_x, batch_u): + input_x = batch_x["img"] + label_x = batch_x["label"] + input_u = batch_u["img"] + + input_x = input_x.to(self.device) + label_x = label_x.to(self.device) + input_u = input_u.to(self.device) + + return input_x, label_x, input_u + + +class TrainerX(SimpleTrainer): + """A base trainer using labeled data only.""" + + def run_epoch(self): + self.set_model_mode("train") + losses = MetricMeter() + batch_time = AverageMeter() + data_time = AverageMeter() + self.num_batches = len(self.train_loader_x) + + end = time.time() + for self.batch_idx, batch in enumerate(self.train_loader_x): + data_time.update(time.time() - end) + loss_summary = self.forward_backward(batch) + batch_time.update(time.time() - end) + losses.update(loss_summary) + + meet_freq = (self.batch_idx + 1) % self.cfg.TRAIN.PRINT_FREQ == 0 + only_few_batches = self.num_batches < self.cfg.TRAIN.PRINT_FREQ + if meet_freq or only_few_batches: + nb_remain = 0 + nb_remain += self.num_batches - self.batch_idx - 1 + nb_remain += ( + self.max_epoch - self.epoch - 1 + ) * self.num_batches + eta_seconds = batch_time.avg * nb_remain + eta = str(datetime.timedelta(seconds=int(eta_seconds))) + + info = [] + info += [f"epoch [{self.epoch + 1}/{self.max_epoch}]"] + info += [f"batch [{self.batch_idx + 1}/{self.num_batches}]"] + info += [f"time {batch_time.val:.3f} ({batch_time.avg:.3f})"] + info += [f"data {data_time.val:.3f} ({data_time.avg:.3f})"] + info += [f"{losses}"] + info += [f"lr {self.get_current_lr():.4e}"] + info += [f"eta {eta}"] + print(" ".join(info)) + + n_iter = self.epoch * self.num_batches + self.batch_idx + for name, meter in losses.meters.items(): + self.write_scalar("train/" + name, meter.avg, n_iter) + self.write_scalar("train/lr", self.get_current_lr(), n_iter) + + end = time.time() + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + domain = batch["domain"] + + input = input.to(self.device) + label = label.to(self.device) + domain = domain.to(self.device) + + return input, label, domain diff --git a/Dassl.pytorch/dassl/evaluation/__init__.py b/Dassl.pytorch/dassl/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59a024f5a45d183f8505eaa24d4e59e1c28f4f60 --- /dev/null +++ b/Dassl.pytorch/dassl/evaluation/__init__.py @@ -0,0 +1,3 @@ +from .build import build_evaluator, EVALUATOR_REGISTRY # isort:skip + +from .evaluator import EvaluatorBase, Classification diff --git a/Dassl.pytorch/dassl/evaluation/build.py b/Dassl.pytorch/dassl/evaluation/build.py new file mode 100644 index 0000000000000000000000000000000000000000..3132a3f401023615fb7bf6f31171d0360313ea8f --- /dev/null +++ b/Dassl.pytorch/dassl/evaluation/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +EVALUATOR_REGISTRY = Registry("EVALUATOR") + + +def build_evaluator(cfg, **kwargs): + avai_evaluators = EVALUATOR_REGISTRY.registered_names() + check_availability(cfg.TEST.EVALUATOR, avai_evaluators) + if cfg.VERBOSE: + print("Loading evaluator: {}".format(cfg.TEST.EVALUATOR)) + return EVALUATOR_REGISTRY.get(cfg.TEST.EVALUATOR)(cfg, **kwargs) diff --git a/Dassl.pytorch/dassl/evaluation/evaluator.py b/Dassl.pytorch/dassl/evaluation/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..eef37975e240390e0658d60e9923cc2816f2d098 --- /dev/null +++ b/Dassl.pytorch/dassl/evaluation/evaluator.py @@ -0,0 +1,125 @@ +import numpy as np +import os.path as osp +from collections import OrderedDict, defaultdict +import torch +from sklearn.metrics import f1_score, confusion_matrix + +from .build import EVALUATOR_REGISTRY + + +class EvaluatorBase: + """Base evaluator.""" + + def __init__(self, cfg): + self.cfg = cfg + + def reset(self): + raise NotImplementedError + + def process(self, mo, gt): + raise NotImplementedError + + def evaluate(self): + raise NotImplementedError + + +@EVALUATOR_REGISTRY.register() +class Classification(EvaluatorBase): + """Evaluator for classification.""" + + def __init__(self, cfg, lab2cname=None, **kwargs): + super().__init__(cfg) + self._lab2cname = lab2cname + self._correct = 0 + self._total = 0 + self._per_class_res = None + self._y_true = [] + self._y_pred = [] + if cfg.TEST.PER_CLASS_RESULT: + assert lab2cname is not None + self._per_class_res = defaultdict(list) + + def reset(self): + self._correct = 0 + self._total = 0 + self._y_true = [] + self._y_pred = [] + if self._per_class_res is not None: + self._per_class_res = defaultdict(list) + + def process(self, mo, gt): + # mo (torch.Tensor): model output [batch, num_classes] + # gt (torch.LongTensor): ground truth [batch] + pred = mo.max(1)[1] + matches = pred.eq(gt).float() + self._correct += int(matches.sum().item()) + self._total += gt.shape[0] + + self._y_true.extend(gt.data.cpu().numpy().tolist()) + self._y_pred.extend(pred.data.cpu().numpy().tolist()) + + if self._per_class_res is not None: + for i, label in enumerate(gt): + label = label.item() + matches_i = int(matches[i].item()) + self._per_class_res[label].append(matches_i) + + def evaluate(self): + results = OrderedDict() + acc = 100.0 * self._correct / self._total + err = 100.0 - acc + macro_f1 = 100.0 * f1_score( + self._y_true, + self._y_pred, + average="macro", + labels=np.unique(self._y_true) + ) + + # The first value will be returned by trainer.test() + results["accuracy"] = acc + results["error_rate"] = err + results["macro_f1"] = macro_f1 + + print( + "=> result\n" + f"* total: {self._total:,}\n" + f"* correct: {self._correct:,}\n" + f"* accuracy: {acc:.1f}%\n" + f"* error: {err:.1f}%\n" + f"* macro_f1: {macro_f1:.1f}%" + ) + + if self._per_class_res is not None: + labels = list(self._per_class_res.keys()) + labels.sort() + + print("=> per-class result") + accs = [] + + for label in labels: + classname = self._lab2cname[label] + res = self._per_class_res[label] + correct = sum(res) + total = len(res) + acc = 100.0 * correct / total + accs.append(acc) + print( + f"* class: {label} ({classname})\t" + f"total: {total:,}\t" + f"correct: {correct:,}\t" + f"acc: {acc:.1f}%" + ) + mean_acc = np.mean(accs) + print(f"* average: {mean_acc:.1f}%") + + results["perclass_accuracy"] = mean_acc + + if self.cfg.TEST.COMPUTE_CMAT: + cmat = confusion_matrix( + self._y_true, self._y_pred, normalize="true" + ) + save_path = osp.join(self.cfg.OUTPUT_DIR, "cmat.pt") + torch.save(cmat, save_path) + print(f"Confusion matrix is saved to {save_path}") + + return results diff --git a/Dassl.pytorch/dassl/metrics/__init__.py b/Dassl.pytorch/dassl/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c2b37de85bc1b8efd3a10c64eb55978a54936f8b --- /dev/null +++ b/Dassl.pytorch/dassl/metrics/__init__.py @@ -0,0 +1,4 @@ +from .accuracy import compute_accuracy +from .distance import ( + cosine_distance, compute_distance_matrix, euclidean_squared_distance +) diff --git a/Dassl.pytorch/dassl/metrics/accuracy.py b/Dassl.pytorch/dassl/metrics/accuracy.py new file mode 100644 index 0000000000000000000000000000000000000000..a8ed0ae5403904f81db3f034d43c0213c41b6f76 --- /dev/null +++ b/Dassl.pytorch/dassl/metrics/accuracy.py @@ -0,0 +1,30 @@ +def compute_accuracy(output, target, topk=(1, )): + """Computes the accuracy over the k top predictions for + the specified values of k. + + Args: + output (torch.Tensor): prediction matrix with shape (batch_size, num_classes). + target (torch.LongTensor): ground truth labels with shape (batch_size). + topk (tuple, optional): accuracy at top-k will be computed. For example, + topk=(1, 5) means accuracy at top-1 and top-5 will be computed. + + Returns: + list: accuracy at top-k. + """ + maxk = max(topk) + batch_size = target.size(0) + + if isinstance(output, (tuple, list)): + output = output[0] + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + acc = correct_k.mul_(100.0 / batch_size) + res.append(acc) + + return res diff --git a/Dassl.pytorch/dassl/metrics/distance.py b/Dassl.pytorch/dassl/metrics/distance.py new file mode 100644 index 0000000000000000000000000000000000000000..80568151a80f87337ca7c458dc2516546c5e42d1 --- /dev/null +++ b/Dassl.pytorch/dassl/metrics/distance.py @@ -0,0 +1,77 @@ +""" +Source: https://github.com/KaiyangZhou/deep-person-reid +""" +import torch +from torch.nn import functional as F + + +def compute_distance_matrix(input1, input2, metric="euclidean"): + """A wrapper function for computing distance matrix. + + Each input matrix has the shape (n_data, feature_dim). + + Args: + input1 (torch.Tensor): 2-D feature matrix. + input2 (torch.Tensor): 2-D feature matrix. + metric (str, optional): "euclidean" or "cosine". + Default is "euclidean". + + Returns: + torch.Tensor: distance matrix. + """ + # check input + assert isinstance(input1, torch.Tensor) + assert isinstance(input2, torch.Tensor) + assert input1.dim() == 2, "Expected 2-D tensor, but got {}-D".format( + input1.dim() + ) + assert input2.dim() == 2, "Expected 2-D tensor, but got {}-D".format( + input2.dim() + ) + assert input1.size(1) == input2.size(1) + + if metric == "euclidean": + distmat = euclidean_squared_distance(input1, input2) + elif metric == "cosine": + distmat = cosine_distance(input1, input2) + else: + raise ValueError( + "Unknown distance metric: {}. " + 'Please choose either "euclidean" or "cosine"'.format(metric) + ) + + return distmat + + +def euclidean_squared_distance(input1, input2): + """Computes euclidean squared distance. + + Args: + input1 (torch.Tensor): 2-D feature matrix. + input2 (torch.Tensor): 2-D feature matrix. + + Returns: + torch.Tensor: distance matrix. + """ + m, n = input1.size(0), input2.size(0) + mat1 = torch.pow(input1, 2).sum(dim=1, keepdim=True).expand(m, n) + mat2 = torch.pow(input2, 2).sum(dim=1, keepdim=True).expand(n, m).t() + distmat = mat1 + mat2 + distmat.addmm_(1, -2, input1, input2.t()) + return distmat + + +def cosine_distance(input1, input2): + """Computes cosine distance. + + Args: + input1 (torch.Tensor): 2-D feature matrix. + input2 (torch.Tensor): 2-D feature matrix. + + Returns: + torch.Tensor: distance matrix. + """ + input1_normed = F.normalize(input1, p=2, dim=1) + input2_normed = F.normalize(input2, p=2, dim=1) + distmat = 1 - torch.mm(input1_normed, input2_normed.t()) + return distmat diff --git a/Dassl.pytorch/dassl/modeling/__init__.py b/Dassl.pytorch/dassl/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..88466b9b248c8ad2ebc01f4f949c8afa95b3836a --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/__init__.py @@ -0,0 +1,3 @@ +from .head import HEAD_REGISTRY, build_head +from .network import NETWORK_REGISTRY, build_network +from .backbone import BACKBONE_REGISTRY, Backbone, build_backbone diff --git a/Dassl.pytorch/dassl/modeling/backbone/__init__.py b/Dassl.pytorch/dassl/modeling/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8e6dc684b0e276f8d7dd807b30b6a4ef5469c5d6 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/__init__.py @@ -0,0 +1,23 @@ +from .build import build_backbone, BACKBONE_REGISTRY # isort:skip +from .backbone import Backbone # isort:skip + +from .vgg import vgg16 +from .resnet import ( + resnet18, resnet34, resnet50, resnet101, resnet152, resnet18_ms_l1, + resnet50_ms_l1, resnet18_ms_l12, resnet50_ms_l12, resnet101_ms_l1, + resnet18_ms_l123, resnet50_ms_l123, resnet101_ms_l12, resnet101_ms_l123, + resnet18_efdmix_l1, resnet50_efdmix_l1, resnet18_efdmix_l12, + resnet50_efdmix_l12, resnet101_efdmix_l1, resnet18_efdmix_l123, + resnet50_efdmix_l123, resnet101_efdmix_l12, resnet101_efdmix_l123 +) +from .alexnet import alexnet +from .wide_resnet import wide_resnet_16_4, wide_resnet_28_2 +from .cnn_digitsdg import cnn_digitsdg +from .efficientnet import ( + efficientnet_b0, efficientnet_b1, efficientnet_b2, efficientnet_b3, + efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7 +) +from .resnet_dynamic import * +from .cnn_digitsingle import cnn_digitsingle +from .preact_resnet18 import preact_resnet18 +from .cnn_digit5_m3sda import cnn_digit5_m3sda diff --git a/Dassl.pytorch/dassl/modeling/backbone/alexnet.py b/Dassl.pytorch/dassl/modeling/backbone/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..2daff24338251d1d182a6bf6332e001c5cee8b4f --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/alexnet.py @@ -0,0 +1,64 @@ +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +model_urls = { + "alexnet": "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth", +} + + +class AlexNet(Backbone): + + def __init__(self): + super().__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) + # Note that self.classifier outputs features rather than logits + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + ) + + self._out_features = 4096 + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + return self.classifier(x) + + +def init_pretrained_weights(model, model_url): + pretrain_dict = model_zoo.load_url(model_url) + model.load_state_dict(pretrain_dict, strict=False) + + +@BACKBONE_REGISTRY.register() +def alexnet(pretrained=True, **kwargs): + model = AlexNet() + + if pretrained: + init_pretrained_weights(model, model_urls["alexnet"]) + + return model diff --git a/Dassl.pytorch/dassl/modeling/backbone/backbone.py b/Dassl.pytorch/dassl/modeling/backbone/backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..b544d945d12d49914a271dbc3861a46d72dd9e6c --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/backbone.py @@ -0,0 +1,17 @@ +import torch.nn as nn + + +class Backbone(nn.Module): + + def __init__(self): + super().__init__() + + def forward(self): + pass + + @property + def out_features(self): + """Output feature dimension.""" + if self.__dict__.get("_out_features") is None: + return None + return self._out_features diff --git a/Dassl.pytorch/dassl/modeling/backbone/build.py b/Dassl.pytorch/dassl/modeling/backbone/build.py new file mode 100644 index 0000000000000000000000000000000000000000..61f4e4fed70e48c3cc6ce98e7b372adf688b5102 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +BACKBONE_REGISTRY = Registry("BACKBONE") + + +def build_backbone(name, verbose=True, **kwargs): + avai_backbones = BACKBONE_REGISTRY.registered_names() + check_availability(name, avai_backbones) + if verbose: + print("Backbone: {}".format(name)) + return BACKBONE_REGISTRY.get(name)(**kwargs) diff --git a/Dassl.pytorch/dassl/modeling/backbone/cnn_digit5_m3sda.py b/Dassl.pytorch/dassl/modeling/backbone/cnn_digit5_m3sda.py new file mode 100644 index 0000000000000000000000000000000000000000..deabded8b26f3d214ad46b04e7f782aec4e86112 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/cnn_digit5_m3sda.py @@ -0,0 +1,58 @@ +""" +Reference + +https://github.com/VisionLearningGroup/VisionLearningGroup.github.io/tree/master/M3SDA +""" +import torch.nn as nn +from torch.nn import functional as F + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class FeatureExtractor(Backbone): + + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2) + self.bn1 = nn.BatchNorm2d(64) + self.conv2 = nn.Conv2d(64, 64, kernel_size=5, stride=1, padding=2) + self.bn2 = nn.BatchNorm2d(64) + self.conv3 = nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=2) + self.bn3 = nn.BatchNorm2d(128) + self.fc1 = nn.Linear(8192, 3072) + self.bn1_fc = nn.BatchNorm1d(3072) + self.fc2 = nn.Linear(3072, 2048) + self.bn2_fc = nn.BatchNorm1d(2048) + + self._out_features = 2048 + + def _check_input(self, x): + H, W = x.shape[2:] + assert ( + H == 32 and W == 32 + ), "Input to network must be 32x32, " "but got {}x{}".format(H, W) + + def forward(self, x): + self._check_input(x) + x = F.relu(self.bn1(self.conv1(x))) + x = F.max_pool2d(x, stride=2, kernel_size=3, padding=1) + x = F.relu(self.bn2(self.conv2(x))) + x = F.max_pool2d(x, stride=2, kernel_size=3, padding=1) + x = F.relu(self.bn3(self.conv3(x))) + x = x.view(x.size(0), 8192) + x = F.relu(self.bn1_fc(self.fc1(x))) + x = F.dropout(x, training=self.training) + x = F.relu(self.bn2_fc(self.fc2(x))) + return x + + +@BACKBONE_REGISTRY.register() +def cnn_digit5_m3sda(**kwargs): + """ + This architecture was used for the Digit-5 dataset in: + + - Peng et al. Moment Matching for Multi-Source + Domain Adaptation. ICCV 2019. + """ + return FeatureExtractor() diff --git a/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsdg.py b/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsdg.py new file mode 100644 index 0000000000000000000000000000000000000000..c68044f3fdca0bcaf2518e14d5f4f9ebcbe8f5c9 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsdg.py @@ -0,0 +1,61 @@ +import torch.nn as nn +from torch.nn import functional as F + +from dassl.utils import init_network_weights + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class Convolution(nn.Module): + + def __init__(self, c_in, c_out): + super().__init__() + self.conv = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1) + self.relu = nn.ReLU(True) + + def forward(self, x): + return self.relu(self.conv(x)) + + +class ConvNet(Backbone): + + def __init__(self, c_hidden=64): + super().__init__() + self.conv1 = Convolution(3, c_hidden) + self.conv2 = Convolution(c_hidden, c_hidden) + self.conv3 = Convolution(c_hidden, c_hidden) + self.conv4 = Convolution(c_hidden, c_hidden) + + self._out_features = 2**2 * c_hidden + + def _check_input(self, x): + H, W = x.shape[2:] + assert ( + H == 32 and W == 32 + ), "Input to network must be 32x32, " "but got {}x{}".format(H, W) + + def forward(self, x): + self._check_input(x) + x = self.conv1(x) + x = F.max_pool2d(x, 2) + x = self.conv2(x) + x = F.max_pool2d(x, 2) + x = self.conv3(x) + x = F.max_pool2d(x, 2) + x = self.conv4(x) + x = F.max_pool2d(x, 2) + return x.view(x.size(0), -1) + + +@BACKBONE_REGISTRY.register() +def cnn_digitsdg(**kwargs): + """ + This architecture was used for DigitsDG dataset in: + + - Zhou et al. Deep Domain-Adversarial Image Generation + for Domain Generalisation. AAAI 2020. + """ + model = ConvNet(c_hidden=64) + init_network_weights(model, init_type="kaiming") + return model diff --git a/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsingle.py b/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsingle.py new file mode 100644 index 0000000000000000000000000000000000000000..0c5101cecdc7223fa5171c6f118a81fdf662db96 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/cnn_digitsingle.py @@ -0,0 +1,56 @@ +""" +This model is built based on +https://github.com/ricvolpi/generalize-unseen-domains/blob/master/model.py +""" +import torch.nn as nn +from torch.nn import functional as F + +from dassl.utils import init_network_weights + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class CNN(Backbone): + + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(3, 64, 5) + self.conv2 = nn.Conv2d(64, 128, 5) + self.fc3 = nn.Linear(5 * 5 * 128, 1024) + self.fc4 = nn.Linear(1024, 1024) + + self._out_features = 1024 + + def _check_input(self, x): + H, W = x.shape[2:] + assert ( + H == 32 and W == 32 + ), "Input to network must be 32x32, " "but got {}x{}".format(H, W) + + def forward(self, x): + self._check_input(x) + x = self.conv1(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + + x = x.view(x.size(0), -1) + + x = self.fc3(x) + x = F.relu(x) + + x = self.fc4(x) + x = F.relu(x) + + return x + + +@BACKBONE_REGISTRY.register() +def cnn_digitsingle(**kwargs): + model = CNN() + init_network_weights(model, init_type="kaiming") + return model diff --git a/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py b/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..20ee4333e0d4d749e61ba359a6596385e38a92f2 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/efficientnet/__init__.py @@ -0,0 +1,12 @@ +""" +Source: https://github.com/lukemelas/EfficientNet-PyTorch. +""" +__version__ = "0.6.4" +from .model import ( + EfficientNet, efficientnet_b0, efficientnet_b1, efficientnet_b2, + efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6, + efficientnet_b7 +) +from .utils import ( + BlockArgs, BlockDecoder, GlobalParams, efficientnet, get_model_params +) diff --git a/Dassl.pytorch/dassl/modeling/backbone/efficientnet/model.py b/Dassl.pytorch/dassl/modeling/backbone/efficientnet/model.py new file mode 100644 index 0000000000000000000000000000000000000000..ed01261d5beba8f183a37e2bf5ee4dc8f9c1ab41 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/efficientnet/model.py @@ -0,0 +1,371 @@ +import torch +from torch import nn +from torch.nn import functional as F + +from .utils import ( + Swish, MemoryEfficientSwish, drop_connect, round_filters, round_repeats, + get_model_params, efficientnet_params, get_same_padding_conv2d, + load_pretrained_weights, calculate_output_image_size +) +from ..build import BACKBONE_REGISTRY +from ..backbone import Backbone + + +class MBConvBlock(nn.Module): + """ + Mobile Inverted Residual Bottleneck Block + + Args: + block_args (namedtuple): BlockArgs, see above + global_params (namedtuple): GlobalParam, see above + + Attributes: + has_se (bool): Whether the block contains a Squeeze and Excitation layer. + """ + + def __init__(self, block_args, global_params, image_size=None): + super().__init__() + self._block_args = block_args + self._bn_mom = 1 - global_params.batch_norm_momentum + self._bn_eps = global_params.batch_norm_epsilon + self.has_se = (self._block_args.se_ratio is + not None) and (0 < self._block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip # skip connection and drop connect + + # Expansion phase + inp = self._block_args.input_filters # number of input channels + oup = ( + self._block_args.input_filters * self._block_args.expand_ratio + ) # number of output channels + if self._block_args.expand_ratio != 1: + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._expand_conv = Conv2d( + in_channels=inp, out_channels=oup, kernel_size=1, bias=False + ) + self._bn0 = nn.BatchNorm2d( + num_features=oup, momentum=self._bn_mom, eps=self._bn_eps + ) + # image_size = calculate_output_image_size(image_size, 1) <-- this would do nothing + + # Depthwise convolution phase + k = self._block_args.kernel_size + s = self._block_args.stride + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._depthwise_conv = Conv2d( + in_channels=oup, + out_channels=oup, + groups=oup, # groups makes it depthwise + kernel_size=k, + stride=s, + bias=False, + ) + self._bn1 = nn.BatchNorm2d( + num_features=oup, momentum=self._bn_mom, eps=self._bn_eps + ) + image_size = calculate_output_image_size(image_size, s) + + # Squeeze and Excitation layer, if desired + if self.has_se: + Conv2d = get_same_padding_conv2d(image_size=(1, 1)) + num_squeezed_channels = max( + 1, + int( + self._block_args.input_filters * self._block_args.se_ratio + ) + ) + self._se_reduce = Conv2d( + in_channels=oup, + out_channels=num_squeezed_channels, + kernel_size=1 + ) + self._se_expand = Conv2d( + in_channels=num_squeezed_channels, + out_channels=oup, + kernel_size=1 + ) + + # Output phase + final_oup = self._block_args.output_filters + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._project_conv = Conv2d( + in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False + ) + self._bn2 = nn.BatchNorm2d( + num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps + ) + self._swish = MemoryEfficientSwish() + + def forward(self, inputs, drop_connect_rate=None): + """ + :param inputs: input tensor + :param drop_connect_rate: drop connect rate (float, between 0 and 1) + :return: output of block + """ + + # Expansion and Depthwise Convolution + x = inputs + if self._block_args.expand_ratio != 1: + x = self._swish(self._bn0(self._expand_conv(inputs))) + x = self._swish(self._bn1(self._depthwise_conv(x))) + + # Squeeze and Excitation + if self.has_se: + x_squeezed = F.adaptive_avg_pool2d(x, 1) + x_squeezed = self._se_expand( + self._swish(self._se_reduce(x_squeezed)) + ) + x = torch.sigmoid(x_squeezed) * x + + x = self._bn2(self._project_conv(x)) + + # Skip connection and drop connect + input_filters, output_filters = ( + self._block_args.input_filters, + self._block_args.output_filters, + ) + if ( + self.id_skip and self._block_args.stride == 1 + and input_filters == output_filters + ): + if drop_connect_rate: + x = drop_connect( + x, p=drop_connect_rate, training=self.training + ) + x = x + inputs # skip connection + return x + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export)""" + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + + +class EfficientNet(Backbone): + """ + An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods + + Args: + blocks_args (list): A list of BlockArgs to construct blocks + global_params (namedtuple): A set of GlobalParams shared between blocks + + Example: + model = EfficientNet.from_pretrained('efficientnet-b0') + + """ + + def __init__(self, blocks_args=None, global_params=None): + super().__init__() + assert isinstance(blocks_args, list), "blocks_args should be a list" + assert len(blocks_args) > 0, "block args must be greater than 0" + self._global_params = global_params + self._blocks_args = blocks_args + + # Batch norm parameters + bn_mom = 1 - self._global_params.batch_norm_momentum + bn_eps = self._global_params.batch_norm_epsilon + + # Get stem static or dynamic convolution depending on image size + image_size = global_params.image_size + Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) + + # Stem + in_channels = 3 # rgb + out_channels = round_filters( + 32, self._global_params + ) # number of output channels + self._conv_stem = Conv2d( + in_channels, out_channels, kernel_size=3, stride=2, bias=False + ) + self._bn0 = nn.BatchNorm2d( + num_features=out_channels, momentum=bn_mom, eps=bn_eps + ) + image_size = calculate_output_image_size(image_size, 2) + + # Build blocks + self._blocks = nn.ModuleList([]) + for block_args in self._blocks_args: + + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters( + block_args.input_filters, self._global_params + ), + output_filters=round_filters( + block_args.output_filters, self._global_params + ), + num_repeat=round_repeats( + block_args.num_repeat, self._global_params + ), + ) + + # The first block needs to take care of stride and filter size increase. + self._blocks.append( + MBConvBlock( + block_args, self._global_params, image_size=image_size + ) + ) + image_size = calculate_output_image_size( + image_size, block_args.stride + ) + if block_args.num_repeat > 1: + block_args = block_args._replace( + input_filters=block_args.output_filters, stride=1 + ) + for _ in range(block_args.num_repeat - 1): + self._blocks.append( + MBConvBlock( + block_args, self._global_params, image_size=image_size + ) + ) + # image_size = calculate_output_image_size(image_size, block_args.stride) # ? + + # Head + in_channels = block_args.output_filters # output of final block + out_channels = round_filters(1280, self._global_params) + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._conv_head = Conv2d( + in_channels, out_channels, kernel_size=1, bias=False + ) + self._bn1 = nn.BatchNorm2d( + num_features=out_channels, momentum=bn_mom, eps=bn_eps + ) + + # Final linear layer + self._avg_pooling = nn.AdaptiveAvgPool2d(1) + self._dropout = nn.Dropout(self._global_params.dropout_rate) + # self._fc = nn.Linear(out_channels, self._global_params.num_classes) + self._swish = MemoryEfficientSwish() + + self._out_features = out_channels + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export)""" + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + for block in self._blocks: + block.set_swish(memory_efficient) + + def extract_features(self, inputs): + """Returns output of the final convolution layer""" + + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self._blocks) + x = block(x, drop_connect_rate=drop_connect_rate) + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + + return x + + def forward(self, inputs): + """ + Calls extract_features to extract features, applies + final linear layer, and returns logits. + """ + bs = inputs.size(0) + # Convolution layers + x = self.extract_features(inputs) + + # Pooling and final linear layer + x = self._avg_pooling(x) + x = x.view(bs, -1) + x = self._dropout(x) + # x = self._fc(x) + return x + + @classmethod + def from_name(cls, model_name, override_params=None): + cls._check_model_name_is_valid(model_name) + blocks_args, global_params = get_model_params( + model_name, override_params + ) + return cls(blocks_args, global_params) + + @classmethod + def from_pretrained( + cls, model_name, advprop=False, num_classes=1000, in_channels=3 + ): + model = cls.from_name( + model_name, override_params={"num_classes": num_classes} + ) + load_pretrained_weights( + model, model_name, load_fc=(num_classes == 1000), advprop=advprop + ) + model._change_in_channels(in_channels) + return model + + @classmethod + def get_image_size(cls, model_name): + cls._check_model_name_is_valid(model_name) + _, _, res, _ = efficientnet_params(model_name) + return res + + @classmethod + def _check_model_name_is_valid(cls, model_name): + """Validates model name.""" + valid_models = ["efficientnet-b" + str(i) for i in range(9)] + if model_name not in valid_models: + raise ValueError( + "model_name should be one of: " + ", ".join(valid_models) + ) + + def _change_in_channels(model, in_channels): + if in_channels != 3: + Conv2d = get_same_padding_conv2d( + image_size=model._global_params.image_size + ) + out_channels = round_filters(32, model._global_params) + model._conv_stem = Conv2d( + in_channels, out_channels, kernel_size=3, stride=2, bias=False + ) + + +def build_efficientnet(name, pretrained): + if pretrained: + return EfficientNet.from_pretrained("efficientnet-{}".format(name)) + else: + return EfficientNet.from_name("efficientnet-{}".format(name)) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b0(pretrained=True, **kwargs): + return build_efficientnet("b0", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b1(pretrained=True, **kwargs): + return build_efficientnet("b1", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b2(pretrained=True, **kwargs): + return build_efficientnet("b2", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b3(pretrained=True, **kwargs): + return build_efficientnet("b3", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b4(pretrained=True, **kwargs): + return build_efficientnet("b4", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b5(pretrained=True, **kwargs): + return build_efficientnet("b5", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b6(pretrained=True, **kwargs): + return build_efficientnet("b6", pretrained) + + +@BACKBONE_REGISTRY.register() +def efficientnet_b7(pretrained=True, **kwargs): + return build_efficientnet("b7", pretrained) diff --git a/Dassl.pytorch/dassl/modeling/backbone/efficientnet/utils.py b/Dassl.pytorch/dassl/modeling/backbone/efficientnet/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a4205061feb9e84209700511fe2e00cfc3499ea1 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/efficientnet/utils.py @@ -0,0 +1,477 @@ +""" +This file contains helper functions for building the model and for loading model parameters. +These helper functions are built to mirror those in the official TensorFlow implementation. +""" + +import re +import math +import collections +from functools import partial +import torch +from torch import nn +from torch.nn import functional as F +from torch.utils import model_zoo + +######################################################################## +############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ############### +######################################################################## + +# Parameters for the entire model (stem, all blocks, and head) +GlobalParams = collections.namedtuple( + "GlobalParams", + [ + "batch_norm_momentum", + "batch_norm_epsilon", + "dropout_rate", + "num_classes", + "width_coefficient", + "depth_coefficient", + "depth_divisor", + "min_depth", + "drop_connect_rate", + "image_size", + ], +) + +# Parameters for an individual model block +BlockArgs = collections.namedtuple( + "BlockArgs", + [ + "kernel_size", + "num_repeat", + "input_filters", + "output_filters", + "expand_ratio", + "id_skip", + "stride", + "se_ratio", + ], +) + +# Change namedtuple defaults +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +class SwishImplementation(torch.autograd.Function): + + @staticmethod + def forward(ctx, i): + result = i * torch.sigmoid(i) + ctx.save_for_backward(i) + return result + + @staticmethod + def backward(ctx, grad_output): + i = ctx.saved_variables[0] + sigmoid_i = torch.sigmoid(i) + return grad_output * (sigmoid_i * (1 + i * (1-sigmoid_i))) + + +class MemoryEfficientSwish(nn.Module): + + def forward(self, x): + return SwishImplementation.apply(x) + + +class Swish(nn.Module): + + def forward(self, x): + return x * torch.sigmoid(x) + + +def round_filters(filters, global_params): + """Calculate and round number of filters based on depth multiplier.""" + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor/2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """Round number of filters based on depth multiplier.""" + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +def drop_connect(inputs, p, training): + """Drop connect.""" + if not training: + return inputs + batch_size = inputs.shape[0] + keep_prob = 1 - p + random_tensor = keep_prob + random_tensor += torch.rand( + [batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device + ) + binary_tensor = torch.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +def get_same_padding_conv2d(image_size=None): + """Chooses static padding if you have specified an image size, and dynamic padding otherwise. + Static padding is necessary for ONNX exporting of models.""" + if image_size is None: + return Conv2dDynamicSamePadding + else: + return partial(Conv2dStaticSamePadding, image_size=image_size) + + +def get_width_and_height_from_size(x): + """Obtains width and height from a int or tuple""" + if isinstance(x, int): + return x, x + if isinstance(x, list) or isinstance(x, tuple): + return x + else: + raise TypeError() + + +def calculate_output_image_size(input_image_size, stride): + """ + Calculates the output image size when using Conv2dSamePadding with a stride. + Necessary for static padding. Thanks to mannatsingh for pointing this out. + """ + if input_image_size is None: + return None + image_height, image_width = get_width_and_height_from_size( + input_image_size + ) + stride = stride if isinstance(stride, int) else stride[0] + image_height = int(math.ceil(image_height / stride)) + image_width = int(math.ceil(image_width / stride)) + return [image_height, image_width] + + +class Conv2dDynamicSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow, for a dynamic image size""" + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + dilation=1, + groups=1, + bias=True, + ): + super().__init__( + in_channels, out_channels, kernel_size, stride, 0, dilation, + groups, bias + ) + self.stride = self.stride if len(self.stride + ) == 2 else [self.stride[0]] * 2 + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max( + (oh-1) * self.stride[0] + (kh-1) * self.dilation[0] + 1 - ih, 0 + ) + pad_w = max( + (ow-1) * self.stride[1] + (kw-1) * self.dilation[1] + 1 - iw, 0 + ) + if pad_h > 0 or pad_w > 0: + x = F.pad( + x, + [pad_w // 2, pad_w - pad_w//2, pad_h // 2, pad_h - pad_h//2] + ) + return F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + + +class Conv2dStaticSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow, for a fixed image size""" + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + image_size=None, + **kwargs + ): + super().__init__(in_channels, out_channels, kernel_size, **kwargs) + self.stride = self.stride if len(self.stride + ) == 2 else [self.stride[0]] * 2 + + # Calculate padding based on image size and save it + assert image_size is not None + ih, iw = (image_size, + image_size) if isinstance(image_size, int) else image_size + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max( + (oh-1) * self.stride[0] + (kh-1) * self.dilation[0] + 1 - ih, 0 + ) + pad_w = max( + (ow-1) * self.stride[1] + (kw-1) * self.dilation[1] + 1 - iw, 0 + ) + if pad_h > 0 or pad_w > 0: + self.static_padding = nn.ZeroPad2d( + (pad_w // 2, pad_w - pad_w//2, pad_h // 2, pad_h - pad_h//2) + ) + else: + self.static_padding = Identity() + + def forward(self, x): + x = self.static_padding(x) + x = F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + return x + + +class Identity(nn.Module): + + def __init__(self, ): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +######################################################################## +############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ############## +######################################################################## + + +def efficientnet_params(model_name): + """Map EfficientNet model name to parameter coefficients.""" + params_dict = { + # Coefficients: width,depth,res,dropout + "efficientnet-b0": (1.0, 1.0, 224, 0.2), + "efficientnet-b1": (1.0, 1.1, 240, 0.2), + "efficientnet-b2": (1.1, 1.2, 260, 0.3), + "efficientnet-b3": (1.2, 1.4, 300, 0.3), + "efficientnet-b4": (1.4, 1.8, 380, 0.4), + "efficientnet-b5": (1.6, 2.2, 456, 0.4), + "efficientnet-b6": (1.8, 2.6, 528, 0.5), + "efficientnet-b7": (2.0, 3.1, 600, 0.5), + "efficientnet-b8": (2.2, 3.6, 672, 0.5), + "efficientnet-l2": (4.3, 5.3, 800, 0.5), + } + return params_dict[model_name] + + +class BlockDecoder(object): + """Block Decoder for readability, straight from the official TensorFlow repository""" + + @staticmethod + def _decode_block_string(block_string): + """Gets a block through a string notation of arguments.""" + assert isinstance(block_string, str) + + ops = block_string.split("_") + options = {} + for op in ops: + splits = re.split(r"(\d.*)", op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + assert ("s" in options and len(options["s"]) == 1) or ( + len(options["s"]) == 2 and options["s"][0] == options["s"][1] + ) + + return BlockArgs( + kernel_size=int(options["k"]), + num_repeat=int(options["r"]), + input_filters=int(options["i"]), + output_filters=int(options["o"]), + expand_ratio=int(options["e"]), + id_skip=("noskip" not in block_string), + se_ratio=float(options["se"]) if "se" in options else None, + stride=[int(options["s"][0])], + ) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + "r%d" % block.num_repeat, + "k%d" % block.kernel_size, + "s%d%d" % (block.strides[0], block.strides[1]), + "e%s" % block.expand_ratio, + "i%d" % block.input_filters, + "o%d" % block.output_filters, + ] + if 0 < block.se_ratio <= 1: + args.append("se%s" % block.se_ratio) + if block.id_skip is False: + args.append("noskip") + return "_".join(args) + + @staticmethod + def decode(string_list): + """ + Decodes a list of string notations to specify blocks inside the network. + + :param string_list: a list of strings, each string is a notation of block + :return: a list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def efficientnet( + width_coefficient=None, + depth_coefficient=None, + dropout_rate=0.2, + drop_connect_rate=0.2, + image_size=None, + num_classes=1000, +): + """Creates a efficientnet model.""" + + blocks_args = [ + "r1_k3_s11_e1_i32_o16_se0.25", + "r2_k3_s22_e6_i16_o24_se0.25", + "r2_k5_s22_e6_i24_o40_se0.25", + "r3_k3_s22_e6_i40_o80_se0.25", + "r3_k5_s11_e6_i80_o112_se0.25", + "r4_k5_s22_e6_i112_o192_se0.25", + "r1_k3_s11_e6_i192_o320_se0.25", + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + # data_format='channels_last', # removed, this is always true in PyTorch + num_classes=num_classes, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None, + image_size=image_size, + ) + + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """Get the block args and global params for a given model""" + if model_name.startswith("efficientnet"): + w, d, s, p = efficientnet_params(model_name) + # note: all models have drop connect rate = 0.2 + blocks_args, global_params = efficientnet( + width_coefficient=w, + depth_coefficient=d, + dropout_rate=p, + image_size=s + ) + else: + raise NotImplementedError( + "model name is not pre-defined: %s" % model_name + ) + if override_params: + # ValueError will be raised here if override_params has fields not included in global_params. + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +url_map = { + "efficientnet-b0": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth", + "efficientnet-b1": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth", + "efficientnet-b2": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth", + "efficientnet-b3": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth", + "efficientnet-b4": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth", + "efficientnet-b5": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth", + "efficientnet-b6": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth", + "efficientnet-b7": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth", +} + +url_map_advprop = { + "efficientnet-b0": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth", + "efficientnet-b1": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth", + "efficientnet-b2": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth", + "efficientnet-b3": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth", + "efficientnet-b4": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth", + "efficientnet-b5": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth", + "efficientnet-b6": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth", + "efficientnet-b7": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth", + "efficientnet-b8": + "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth", +} + + +def load_pretrained_weights(model, model_name, load_fc=True, advprop=False): + """Loads pretrained weights, and downloads if loading for the first time.""" + # AutoAugment or Advprop (different preprocessing) + url_map_ = url_map_advprop if advprop else url_map + state_dict = model_zoo.load_url(url_map_[model_name]) + model.load_state_dict(state_dict, strict=False) + """ + if load_fc: + model.load_state_dict(state_dict) + else: + state_dict.pop('_fc.weight') + state_dict.pop('_fc.bias') + res = model.load_state_dict(state_dict, strict=False) + assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights' + + print('Loaded pretrained weights for {}'.format(model_name)) + """ diff --git a/Dassl.pytorch/dassl/modeling/backbone/preact_resnet18.py b/Dassl.pytorch/dassl/modeling/backbone/preact_resnet18.py new file mode 100644 index 0000000000000000000000000000000000000000..8c0708993e1d0c3f9c511d55c8ab75bd8a9ac6a7 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/preact_resnet18.py @@ -0,0 +1,135 @@ +import torch.nn as nn +import torch.nn.functional as F + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class PreActBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super().__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d( + in_planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=1, padding=1, bias=False + ) + + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, "shortcut") else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out += shortcut + return out + + +class PreActBottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super().__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn3 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, self.expansion * planes, kernel_size=1, bias=False + ) + + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, "shortcut") else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out = self.conv3(F.relu(self.bn3(out))) + out += shortcut + return out + + +class PreActResNet(Backbone): + + def __init__(self, block, num_blocks): + super().__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=3, stride=1, padding=1, bias=False + ) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + + self._out_features = 512 * block.expansion + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1] * (num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = self.conv1(x) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + return out + + +""" +Preact-ResNet18 was used for the CIFAR10 and +SVHN datasets (both are SSL tasks) in + +- Wang et al. Semi-Supervised Learning by +Augmented Distribution Alignment. ICCV 2019. +""" + + +@BACKBONE_REGISTRY.register() +def preact_resnet18(**kwargs): + return PreActResNet(PreActBlock, [2, 2, 2, 2]) diff --git a/Dassl.pytorch/dassl/modeling/backbone/resnet.py b/Dassl.pytorch/dassl/modeling/backbone/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..60b9a8c874b63e3222a805c96fd5cdffa904063f --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/resnet.py @@ -0,0 +1,591 @@ +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +model_urls = { + "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth", + "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth", + "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth", + "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", + "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth", +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super().__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super().__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False + ) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(Backbone): + + def __init__( + self, + block, + layers, + ms_class=None, + ms_layers=[], + ms_p=0.5, + ms_a=0.1, + **kwargs + ): + self.inplanes = 64 + super().__init__() + + # backbone network + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False + ) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + + self._out_features = 512 * block.expansion + + self.mixstyle = None + if ms_layers: + self.mixstyle = ms_class(p=ms_p, alpha=ms_a) + for layer_name in ms_layers: + assert layer_name in ["layer1", "layer2", "layer3"] + print( + f"Insert {self.mixstyle.__class__.__name__} after {ms_layers}" + ) + self.ms_layers = ms_layers + + self._init_params() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def featuremaps(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + if "layer1" in self.ms_layers: + x = self.mixstyle(x) + x = self.layer2(x) + if "layer2" in self.ms_layers: + x = self.mixstyle(x) + x = self.layer3(x) + if "layer3" in self.ms_layers: + x = self.mixstyle(x) + return self.layer4(x) + + def forward(self, x): + f = self.featuremaps(x) + v = self.global_avgpool(f) + return v.view(v.size(0), -1) + + +def init_pretrained_weights(model, model_url): + pretrain_dict = model_zoo.load_url(model_url) + model.load_state_dict(pretrain_dict, strict=False) + + +""" +Residual network configurations: +-- +resnet18: block=BasicBlock, layers=[2, 2, 2, 2] +resnet34: block=BasicBlock, layers=[3, 4, 6, 3] +resnet50: block=Bottleneck, layers=[3, 4, 6, 3] +resnet101: block=Bottleneck, layers=[3, 4, 23, 3] +resnet152: block=Bottleneck, layers=[3, 8, 36, 3] +""" + + +@BACKBONE_REGISTRY.register() +def resnet18(pretrained=True, **kwargs): + model = ResNet(block=BasicBlock, layers=[2, 2, 2, 2]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet34(pretrained=True, **kwargs): + model = ResNet(block=BasicBlock, layers=[3, 4, 6, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet34"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50(pretrained=True, **kwargs): + model = ResNet(block=Bottleneck, layers=[3, 4, 6, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101(pretrained=True, **kwargs): + model = ResNet(block=Bottleneck, layers=[3, 4, 23, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet152(pretrained=True, **kwargs): + model = ResNet(block=Bottleneck, layers=[3, 8, 36, 3]) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet152"]) + + return model + + +""" +Residual networks with mixstyle +""" + + +@BACKBONE_REGISTRY.register() +def resnet18_ms_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_ms_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=MixStyle, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_ms_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=MixStyle, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_ms_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_ms_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_ms_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=MixStyle, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_ms_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_ms_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=MixStyle, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_ms_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import MixStyle + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=MixStyle, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +""" +Residual networks with efdmix +""" + + +@BACKBONE_REGISTRY.register() +def resnet18_efdmix_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=EFDMix, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_efdmix_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=EFDMix, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_efdmix_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=BasicBlock, + layers=[2, 2, 2, 2], + ms_class=EFDMix, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet18"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_efdmix_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_efdmix_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_efdmix_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 6, 3], + ms_class=EFDMix, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet50"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_efdmix_l123(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2", "layer3"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_efdmix_l12(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=EFDMix, + ms_layers=["layer1", "layer2"], + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_efdmix_l1(pretrained=True, **kwargs): + from dassl.modeling.ops import EFDMix + + model = ResNet( + block=Bottleneck, + layers=[3, 4, 23, 3], + ms_class=EFDMix, + ms_layers=["layer1"] + ) + + if pretrained: + init_pretrained_weights(model, model_urls["resnet101"]) + + return model diff --git a/Dassl.pytorch/dassl/modeling/backbone/resnet_dynamic.py b/Dassl.pytorch/dassl/modeling/backbone/resnet_dynamic.py new file mode 100644 index 0000000000000000000000000000000000000000..c4e08dedf04888a51a74bb3a1bf983dc95ec3e17 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/resnet_dynamic.py @@ -0,0 +1,734 @@ +""" +Dynamic ResNet from `"Dynamic Domain Generalization" `_. +""" + +from typing import Any, List, Type, Union, Callable, Optional +from collections import OrderedDict +import torch +import torch.nn as nn +from torch import Tensor +from torch.hub import load_state_dict_from_url + +from dassl.modeling.ops import MixStyle, Conv2dDynamic + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +__all__ = [ + "resnet18_dynamic", "resnet50_dynamic", "resnet101_dynamic", + "resnet18_dynamic_ms_l123", "resnet18_dynamic_ms_l12", + "resnet18_dynamic_ms_l1", "resnet50_dynamic_ms_l123", + "resnet50_dynamic_ms_l12", "resnet50_dynamic_ms_l1", + "resnet101_dynamic_ms_l123", "resnet101_dynamic_ms_l12", + "resnet101_dynamic_ms_l1" +] + +model_urls = { + "resnet18_dynamic": + "https://csip.fzu.edu.cn/files/models/resnet18_dynamic-074db766.pth", + "resnet50_dynamic": + "https://csip.fzu.edu.cn/files/models/resnet50_dynamic-2c3b0201.pth", + "resnet101_dynamic": + "https://csip.fzu.edu.cn/files/models/resnet101_dynamic-c5f15780.pth", +} + + +def conv3x3( + in_planes: int, + out_planes: int, + stride: int = 1, + groups: int = 1, + dilation: int = 1 +) -> nn.Conv2d: + """3x3 convolution with padding""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + groups=groups, + bias=False, + dilation=dilation + ) + + +def conv3x3_dynamic( + in_planes: int, + out_planes: int, + stride: int = 1, + attention_in_channels: int = None +) -> Conv2dDynamic: + """3x3 convolution with padding""" + return Conv2dDynamic( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + attention_in_channels=attention_in_channels + ) + + +def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: + """1x1 convolution""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=1, stride=stride, bias=False + ) + + +def load_state_dict( + model: nn.Module, + state_dict: "OrderedDict[str, Tensor]", + allowed_missing_keys: List = None +): + r"""Copies parameters and buffers from :attr:`state_dict` into + this module and its descendants. If :attr:`strict` is ``True``, then + the keys of :attr:`state_dict` must exactly match the keys returned + by this module's :meth:`~torch.nn.Module.state_dict` function. + + Args: + model (torch.nn.Module): a torch.nn.Module object where state_dict load for. + state_dict (dict): a dict containing parameters and + persistent buffers. + allowed_missing_keys (List, optional): not raise `RuntimeError` if missing_keys + equal to allowed_missing_keys. + + Returns: + ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields: + * **missing_keys** is a list of str containing the missing keys + * **unexpected_keys** is a list of str containing the unexpected keys + + Note: + If a parameter or buffer is registered as ``None`` and its corresponding key + exists in :attr:`state_dict`, :meth:`load_state_dict` will raise a + ``RuntimeError``. + """ + missing_keys, unexpected_keys = model.load_state_dict( + state_dict, strict=allowed_missing_keys is None + ) + + msgs: List[str] = [] + raise_error = False + if len(unexpected_keys) > 0: + raise_error = True + msgs.insert( + 0, "Unexpected key(s) in state_dict: {}. ".format( + ", ".join("'{}'".format(k) for k in unexpected_keys) + ) + ) + if len(missing_keys) > 0: + if allowed_missing_keys is None or sorted(missing_keys) != sorted( + allowed_missing_keys + ): + raise_error = True + msgs.insert( + 0, "Missing key(s) in state_dict: {}. ".format( + ", ".join("'{}'".format(k) for k in missing_keys) + ) + ) + if raise_error: + raise RuntimeError( + "Error(s) in loading state_dict for {}:\n\t{}".format( + model.__class__.__name__, "\n\t".join(msgs) + ) + ) + if len(msgs) > 0: + print( + "\nInfo(s) in loading state_dict for {}:\n\t{}".format( + model.__class__.__name__, "\n\t".join(msgs) + ) + ) + + +class BasicBlock(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError( + "BasicBlock only supports groups=1 and base_width=64" + ) + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BasicBlock" + ) + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion: int = 4 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width/64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class BasicBlockDynamic(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BasicBlockDynamic, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError( + "BasicBlock only supports groups=1 and base_width=64" + ) + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BasicBlock" + ) + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3_dynamic( + inplanes, planes, stride, attention_in_channels=inplanes + ) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3_dynamic( + planes, planes, attention_in_channels=inplanes + ) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x, attention_x=x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out, attention_x=x) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class BottleneckDynamic(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion: int = 4 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BottleneckDynamic, self).__init__() + if groups != 1: + raise ValueError("BottleneckDynamic only supports groups=1") + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BottleneckDynamic" + ) + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width/64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3_dynamic( + width, width, stride, attention_in_channels=inplanes + ) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out, attention_x=x) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(Backbone): + + def __init__( + self, + block: Type[Union[BasicBlock, Bottleneck, BasicBlockDynamic, + BottleneckDynamic]], + layers: List[int], + has_fc: bool = True, + num_classes: int = 1000, + zero_init_residual: bool = False, + groups: int = 1, + width_per_group: int = 64, + replace_stride_with_dilation: Optional[List[bool]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + ms_class=None, + ms_layers=None, + ms_p=0.5, + ms_a=0.1 + ) -> None: + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError( + "replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}". + format(replace_stride_with_dilation) + ) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d( + 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False + ) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer( + block, + 128, + layers[1], + stride=2, + dilate=replace_stride_with_dilation[0] + ) + self.layer3 = self._make_layer( + block, + 256, + layers[2], + stride=2, + dilate=replace_stride_with_dilation[1] + ) + self.layer4 = self._make_layer( + block, + 512, + layers[3], + stride=2, + dilate=replace_stride_with_dilation[2] + ) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + + self.has_fc = has_fc + self._out_features = 512 * block.expansion + if has_fc: + self.fc = nn.Linear(self.out_features, num_classes) + self._out_features = num_classes + + if ms_class is not None and ms_layers is not None: + self.ms_class = ms_class(p=ms_p, alpha=ms_a) + for layer in ms_layers: + assert layer in ["layer1", "layer2", "layer3"] + self.ms_layers = ms_layers + else: + self.ms_class = None + self.ms_layers = [] + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer( + self, + block: Type[Union[BasicBlock, Bottleneck]], + planes: int, + blocks: int, + stride: int = 1, + dilate: bool = False + ) -> nn.Sequential: + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer + ) + ) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block( + self.inplanes, + planes, + groups=self.groups, + base_width=self.base_width, + dilation=self.dilation, + norm_layer=norm_layer + ) + ) + + return nn.Sequential(*layers) + + def _forward_impl(self, x: Tensor) -> Tensor: + # See note [TorchScript super()] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + if "layer1" in self.ms_layers: + x = self.ms_class(x) + x = self.layer2(x) + if "layer2" in self.ms_layers: + x = self.ms_class(x) + x = self.layer3(x) + if "layer3" in self.ms_layers: + x = self.ms_class(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + if self.has_fc: + x = self.fc(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _resnet( + arch: str, block: Type[Union[BasicBlock, Bottleneck, BasicBlockDynamic, + BottleneckDynamic]], layers: List[int], + pretrained: bool, progress: bool, **kwargs: Any +) -> ResNet: + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url( + model_urls[arch], progress=progress + ) + # remove useless keys from sate_dict 1. no fc; 2. out_features != 1000. + removed_keys = model.has_fc is False or ( + model.has_fc is True and model.out_features != 1000 + ) + removed_keys = ["fc.weight", "fc.bias"] if removed_keys else [] + for key in removed_keys: + state_dict.pop(key) + # if has fc, then allow missing key, else strict load state_dict. + allowed_missing_keys = removed_keys if model.has_fc else None + load_state_dict(model, state_dict, allowed_missing_keys) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic_ms_l123(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic_ms_l12(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet18_dynamic_ms_l1(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet18_dynamic", + BasicBlockDynamic, [2, 2, 2, 2], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic_ms_l123(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic_ms_l12(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet50_dynamic_ms_l1(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet50_dynamic", + BottleneckDynamic, [3, 4, 6, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic_ms_l123(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2", "layer3"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic_ms_l12(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1", "layer2"] + ) + return model + + +@BACKBONE_REGISTRY.register() +def resnet101_dynamic_ms_l1(pretrained=True, **kwargs) -> ResNet: + model = _resnet( + "resnet101_dynamic", + BottleneckDynamic, [3, 4, 23, 3], + pretrained=pretrained, + progress=True, + has_fc=False, + ms_class=MixStyle, + ms_layers=["layer1"] + ) + return model diff --git a/Dassl.pytorch/dassl/modeling/backbone/vgg.py b/Dassl.pytorch/dassl/modeling/backbone/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..3f91491aa20870b47cea0a3b4e4bd73517273c8d --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/vgg.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + +try: + from torch.hub import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url + +model_urls = { + "vgg11": "https://download.pytorch.org/models/vgg11-bbd30ac9.pth", + "vgg13": "https://download.pytorch.org/models/vgg13-c768596a.pth", + "vgg16": "https://download.pytorch.org/models/vgg16-397923af.pth", + "vgg19": "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth", + "vgg11_bn": "https://download.pytorch.org/models/vgg11_bn-6002323d.pth", + "vgg13_bn": "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth", + "vgg16_bn": "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth", + "vgg19_bn": "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth", +} + + +class VGG(Backbone): + + def __init__(self, features, init_weights=True): + super().__init__() + self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) + # Note that self.classifier outputs features rather than logits + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + ) + + self._out_features = 4096 + + if init_weights: + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + return self.classifier(x) + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + +def make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == "M": + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + +cfgs = { + "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], + "B": + [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], + "D": [ + 64, + 64, + "M", + 128, + 128, + "M", + 256, + 256, + 256, + "M", + 512, + 512, + 512, + "M", + 512, + 512, + 512, + "M", + ], + "E": [ + 64, + 64, + "M", + 128, + 128, + "M", + 256, + 256, + 256, + 256, + "M", + 512, + 512, + 512, + 512, + "M", + 512, + 512, + 512, + 512, + "M", + ], +} + + +def _vgg(arch, cfg, batch_norm, pretrained): + init_weights = False if pretrained else True + model = VGG( + make_layers(cfgs[cfg], batch_norm=batch_norm), + init_weights=init_weights + ) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], progress=True) + model.load_state_dict(state_dict, strict=False) + return model + + +@BACKBONE_REGISTRY.register() +def vgg16(pretrained=True, **kwargs): + return _vgg("vgg16", "D", False, pretrained) diff --git a/Dassl.pytorch/dassl/modeling/backbone/wide_resnet.py b/Dassl.pytorch/dassl/modeling/backbone/wide_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..88ea949dc01fe112a000ca4ac8fab1b495ce086d --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/backbone/wide_resnet.py @@ -0,0 +1,150 @@ +""" +Modified from https://github.com/xternalz/WideResNet-pytorch +""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .build import BACKBONE_REGISTRY +from .backbone import Backbone + + +class BasicBlock(nn.Module): + + def __init__(self, in_planes, out_planes, stride, dropRate=0.0): + super().__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.relu1 = nn.LeakyReLU(0.01, inplace=True) + self.conv1 = nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False + ) + self.bn2 = nn.BatchNorm2d(out_planes) + self.relu2 = nn.LeakyReLU(0.01, inplace=True) + self.conv2 = nn.Conv2d( + out_planes, + out_planes, + kernel_size=3, + stride=1, + padding=1, + bias=False + ) + self.droprate = dropRate + self.equalInOut = in_planes == out_planes + self.convShortcut = ( + (not self.equalInOut) and nn.Conv2d( + in_planes, + out_planes, + kernel_size=1, + stride=stride, + padding=0, + bias=False, + ) or None + ) + + def forward(self, x): + if not self.equalInOut: + x = self.relu1(self.bn1(x)) + else: + out = self.relu1(self.bn1(x)) + out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) + if self.droprate > 0: + out = F.dropout(out, p=self.droprate, training=self.training) + out = self.conv2(out) + return torch.add(x if self.equalInOut else self.convShortcut(x), out) + + +class NetworkBlock(nn.Module): + + def __init__( + self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0 + ): + super().__init__() + self.layer = self._make_layer( + block, in_planes, out_planes, nb_layers, stride, dropRate + ) + + def _make_layer( + self, block, in_planes, out_planes, nb_layers, stride, dropRate + ): + layers = [] + for i in range(int(nb_layers)): + layers.append( + block( + i == 0 and in_planes or out_planes, + out_planes, + i == 0 and stride or 1, + dropRate, + ) + ) + return nn.Sequential(*layers) + + def forward(self, x): + return self.layer(x) + + +class WideResNet(Backbone): + + def __init__(self, depth, widen_factor, dropRate=0.0): + super().__init__() + nChannels = [ + 16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor + ] + assert (depth-4) % 6 == 0 + n = (depth-4) / 6 + block = BasicBlock + # 1st conv before any network block + self.conv1 = nn.Conv2d( + 3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False + ) + # 1st block + self.block1 = NetworkBlock( + n, nChannels[0], nChannels[1], block, 1, dropRate + ) + # 2nd block + self.block2 = NetworkBlock( + n, nChannels[1], nChannels[2], block, 2, dropRate + ) + # 3rd block + self.block3 = NetworkBlock( + n, nChannels[2], nChannels[3], block, 2, dropRate + ) + # global average pooling and classifier + self.bn1 = nn.BatchNorm2d(nChannels[3]) + self.relu = nn.LeakyReLU(0.01, inplace=True) + + self._out_features = nChannels[3] + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.bias.data.zero_() + + def forward(self, x): + out = self.conv1(x) + out = self.block1(out) + out = self.block2(out) + out = self.block3(out) + out = self.relu(self.bn1(out)) + out = F.adaptive_avg_pool2d(out, 1) + return out.view(out.size(0), -1) + + +@BACKBONE_REGISTRY.register() +def wide_resnet_28_2(**kwargs): + return WideResNet(28, 2) + + +@BACKBONE_REGISTRY.register() +def wide_resnet_16_4(**kwargs): + return WideResNet(16, 4) diff --git a/Dassl.pytorch/dassl/modeling/head/__init__.py b/Dassl.pytorch/dassl/modeling/head/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e76fb8ccfda3ca0606f53df48f8573ef61513a51 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/head/__init__.py @@ -0,0 +1,3 @@ +from .build import build_head, HEAD_REGISTRY # isort:skip + +from .mlp import mlp diff --git a/Dassl.pytorch/dassl/modeling/head/build.py b/Dassl.pytorch/dassl/modeling/head/build.py new file mode 100644 index 0000000000000000000000000000000000000000..730437b62aea1d561711aafa1d76ef066476054d --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/head/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +HEAD_REGISTRY = Registry("HEAD") + + +def build_head(name, verbose=True, **kwargs): + avai_heads = HEAD_REGISTRY.registered_names() + check_availability(name, avai_heads) + if verbose: + print("Head: {}".format(name)) + return HEAD_REGISTRY.get(name)(**kwargs) diff --git a/Dassl.pytorch/dassl/modeling/head/mlp.py b/Dassl.pytorch/dassl/modeling/head/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..89aae50eb86b201daaad10aa99aa2384135c1798 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/head/mlp.py @@ -0,0 +1,50 @@ +import functools +import torch.nn as nn + +from .build import HEAD_REGISTRY + + +class MLP(nn.Module): + + def __init__( + self, + in_features=2048, + hidden_layers=[], + activation="relu", + bn=True, + dropout=0.0, + ): + super().__init__() + if isinstance(hidden_layers, int): + hidden_layers = [hidden_layers] + + assert len(hidden_layers) > 0 + self.out_features = hidden_layers[-1] + + mlp = [] + + if activation == "relu": + act_fn = functools.partial(nn.ReLU, inplace=True) + elif activation == "leaky_relu": + act_fn = functools.partial(nn.LeakyReLU, inplace=True) + else: + raise NotImplementedError + + for hidden_dim in hidden_layers: + mlp += [nn.Linear(in_features, hidden_dim)] + if bn: + mlp += [nn.BatchNorm1d(hidden_dim)] + mlp += [act_fn()] + if dropout > 0: + mlp += [nn.Dropout(dropout)] + in_features = hidden_dim + + self.mlp = nn.Sequential(*mlp) + + def forward(self, x): + return self.mlp(x) + + +@HEAD_REGISTRY.register() +def mlp(**kwargs): + return MLP(**kwargs) diff --git a/Dassl.pytorch/dassl/modeling/network/__init__.py b/Dassl.pytorch/dassl/modeling/network/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a6fcc638d4b1c8bf4054611933643db375635c5b --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/network/__init__.py @@ -0,0 +1,5 @@ +from .build import build_network, NETWORK_REGISTRY # isort:skip + +from .ddaig_fcn import ( + fcn_3x32_gctx, fcn_3x64_gctx, fcn_3x32_gctx_stn, fcn_3x64_gctx_stn +) diff --git a/Dassl.pytorch/dassl/modeling/network/build.py b/Dassl.pytorch/dassl/modeling/network/build.py new file mode 100644 index 0000000000000000000000000000000000000000..e615314fb6406a5c01c9b0ea61dee32bca4f8e2d --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/network/build.py @@ -0,0 +1,11 @@ +from dassl.utils import Registry, check_availability + +NETWORK_REGISTRY = Registry("NETWORK") + + +def build_network(name, verbose=True, **kwargs): + avai_models = NETWORK_REGISTRY.registered_names() + check_availability(name, avai_models) + if verbose: + print("Network: {}".format(name)) + return NETWORK_REGISTRY.get(name)(**kwargs) diff --git a/Dassl.pytorch/dassl/modeling/network/ddaig_fcn.py b/Dassl.pytorch/dassl/modeling/network/ddaig_fcn.py new file mode 100644 index 0000000000000000000000000000000000000000..17e3bdd25b74032ad4935c3747403a349999389d --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/network/ddaig_fcn.py @@ -0,0 +1,329 @@ +""" +Credit to: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix +""" +import functools +import torch +import torch.nn as nn +from torch.nn import functional as F + +from .build import NETWORK_REGISTRY + + +def init_network_weights(model, init_type="normal", gain=0.02): + + def _init_func(m): + classname = m.__class__.__name__ + if hasattr(m, "weight") and ( + classname.find("Conv") != -1 or classname.find("Linear") != -1 + ): + if init_type == "normal": + nn.init.normal_(m.weight.data, 0.0, gain) + elif init_type == "xavier": + nn.init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == "kaiming": + nn.init.kaiming_normal_(m.weight.data, a=0, mode="fan_in") + elif init_type == "orthogonal": + nn.init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError( + "initialization method {} is not implemented". + format(init_type) + ) + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias.data, 0.0) + elif classname.find("BatchNorm2d") != -1: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + elif classname.find("InstanceNorm2d") != -1: + if m.weight is not None and m.bias is not None: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + + model.apply(_init_func) + + +def get_norm_layer(norm_type="instance"): + if norm_type == "batch": + norm_layer = functools.partial(nn.BatchNorm2d, affine=True) + elif norm_type == "instance": + norm_layer = functools.partial( + nn.InstanceNorm2d, affine=False, track_running_stats=False + ) + elif norm_type == "none": + norm_layer = None + else: + raise NotImplementedError( + "normalization layer [%s] is not found" % norm_type + ) + return norm_layer + + +class ResnetBlock(nn.Module): + + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): + super().__init__() + self.conv_block = self.build_conv_block( + dim, padding_type, norm_layer, use_dropout, use_bias + ) + + def build_conv_block( + self, dim, padding_type, norm_layer, use_dropout, use_bias + ): + conv_block = [] + p = 0 + if padding_type == "reflect": + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == "replicate": + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == "zero": + p = 1 + else: + raise NotImplementedError( + "padding [%s] is not implemented" % padding_type + ) + + conv_block += [ + nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), + norm_layer(dim), + nn.ReLU(True), + ] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == "reflect": + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == "replicate": + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == "zero": + p = 1 + else: + raise NotImplementedError( + "padding [%s] is not implemented" % padding_type + ) + conv_block += [ + nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), + norm_layer(dim), + ] + + return nn.Sequential(*conv_block) + + def forward(self, x): + return x + self.conv_block(x) + + +class LocNet(nn.Module): + """Localization network.""" + + def __init__( + self, + input_nc, + nc=32, + n_blocks=3, + use_dropout=False, + padding_type="zero", + image_size=32, + ): + super().__init__() + + backbone = [] + backbone += [ + nn.Conv2d( + input_nc, nc, kernel_size=3, stride=2, padding=1, bias=False + ) + ] + backbone += [nn.BatchNorm2d(nc)] + backbone += [nn.ReLU(True)] + for _ in range(n_blocks): + backbone += [ + ResnetBlock( + nc, + padding_type=padding_type, + norm_layer=nn.BatchNorm2d, + use_dropout=use_dropout, + use_bias=False, + ) + ] + backbone += [nn.MaxPool2d(2, stride=2)] + self.backbone = nn.Sequential(*backbone) + reduced_imsize = int(image_size * 0.5**(n_blocks + 1)) + self.fc_loc = nn.Linear(nc * reduced_imsize**2, 2 * 2) + + def forward(self, x): + x = self.backbone(x) + x = x.view(x.size(0), -1) + x = self.fc_loc(x) + x = torch.tanh(x) + x = x.view(-1, 2, 2) + theta = x.data.new_zeros(x.size(0), 2, 3) + theta[:, :, :2] = x + return theta + + +class FCN(nn.Module): + """Fully convolutional network.""" + + def __init__( + self, + input_nc, + output_nc, + nc=32, + n_blocks=3, + norm_layer=nn.BatchNorm2d, + use_dropout=False, + padding_type="reflect", + gctx=True, + stn=False, + image_size=32, + ): + super().__init__() + + backbone = [] + + p = 0 + if padding_type == "reflect": + backbone += [nn.ReflectionPad2d(1)] + elif padding_type == "replicate": + backbone += [nn.ReplicationPad2d(1)] + elif padding_type == "zero": + p = 1 + else: + raise NotImplementedError + backbone += [ + nn.Conv2d( + input_nc, nc, kernel_size=3, stride=1, padding=p, bias=False + ) + ] + backbone += [norm_layer(nc)] + backbone += [nn.ReLU(True)] + + for _ in range(n_blocks): + backbone += [ + ResnetBlock( + nc, + padding_type=padding_type, + norm_layer=norm_layer, + use_dropout=use_dropout, + use_bias=False, + ) + ] + self.backbone = nn.Sequential(*backbone) + + # global context fusion layer + self.gctx_fusion = None + if gctx: + self.gctx_fusion = nn.Sequential( + nn.Conv2d( + 2 * nc, nc, kernel_size=1, stride=1, padding=0, bias=False + ), + norm_layer(nc), + nn.ReLU(True), + ) + + self.regress = nn.Sequential( + nn.Conv2d( + nc, output_nc, kernel_size=1, stride=1, padding=0, bias=True + ), + nn.Tanh(), + ) + + self.locnet = None + if stn: + self.locnet = LocNet( + input_nc, nc=nc, n_blocks=n_blocks, image_size=image_size + ) + + def init_loc_layer(self): + """Initialize the weights/bias with identity transformation.""" + if self.locnet is not None: + self.locnet.fc_loc.weight.data.zero_() + self.locnet.fc_loc.bias.data.copy_( + torch.tensor([1, 0, 0, 1], dtype=torch.float) + ) + + def stn(self, x): + """Spatial transformer network.""" + theta = self.locnet(x) + grid = F.affine_grid(theta, x.size()) + return F.grid_sample(x, grid), theta + + def forward(self, x, lmda=1.0, return_p=False, return_stn_output=False): + """ + Args: + x (torch.Tensor): input mini-batch. + lmda (float): multiplier for perturbation. + return_p (bool): return perturbation. + return_stn_output (bool): return the output of stn. + """ + theta = None + if self.locnet is not None: + x, theta = self.stn(x) + input = x + + x = self.backbone(x) + if self.gctx_fusion is not None: + c = F.adaptive_avg_pool2d(x, (1, 1)) + c = c.expand_as(x) + x = torch.cat([x, c], 1) + x = self.gctx_fusion(x) + + p = self.regress(x) + x_p = input + lmda*p + + if return_stn_output: + return x_p, p, input + + if return_p: + return x_p, p + + return x_p + + +@NETWORK_REGISTRY.register() +def fcn_3x32_gctx(**kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN(3, 3, nc=32, n_blocks=3, norm_layer=norm_layer) + init_network_weights(net, init_type="normal", gain=0.02) + return net + + +@NETWORK_REGISTRY.register() +def fcn_3x64_gctx(**kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN(3, 3, nc=64, n_blocks=3, norm_layer=norm_layer) + init_network_weights(net, init_type="normal", gain=0.02) + return net + + +@NETWORK_REGISTRY.register() +def fcn_3x32_gctx_stn(image_size=32, **kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN( + 3, + 3, + nc=32, + n_blocks=3, + norm_layer=norm_layer, + stn=True, + image_size=image_size + ) + init_network_weights(net, init_type="normal", gain=0.02) + net.init_loc_layer() + return net + + +@NETWORK_REGISTRY.register() +def fcn_3x64_gctx_stn(image_size=224, **kwargs): + norm_layer = get_norm_layer(norm_type="instance") + net = FCN( + 3, + 3, + nc=64, + n_blocks=3, + norm_layer=norm_layer, + stn=True, + image_size=image_size + ) + init_network_weights(net, init_type="normal", gain=0.02) + net.init_loc_layer() + return net diff --git a/Dassl.pytorch/dassl/modeling/ops/__init__.py b/Dassl.pytorch/dassl/modeling/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..44d064003b06395640d2c7be020f0dfc4b9aa5c9 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/__init__.py @@ -0,0 +1,18 @@ +from .mmd import MaximumMeanDiscrepancy +from .conv import * +from .dsbn import DSBN1d, DSBN2d +from .mixup import mixup +from .efdmix import ( + EFDMix, random_efdmix, activate_efdmix, run_with_efdmix, deactivate_efdmix, + crossdomain_efdmix, run_without_efdmix +) +from .mixstyle import ( + MixStyle, random_mixstyle, activate_mixstyle, run_with_mixstyle, + deactivate_mixstyle, crossdomain_mixstyle, run_without_mixstyle +) +from .attention import * +from .transnorm import TransNorm1d, TransNorm2d +from .sequential2 import Sequential2 +from .reverse_grad import ReverseGrad +from .cross_entropy import cross_entropy +from .optimal_transport import SinkhornDivergence, MinibatchEnergyDistance diff --git a/Dassl.pytorch/dassl/modeling/ops/attention.py b/Dassl.pytorch/dassl/modeling/ops/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..16ddcdab4d551e65fd588b87638efbed89b4c0e6 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/attention.py @@ -0,0 +1,31 @@ +import torch.nn as nn +from torch.nn import functional as F + +__all__ = ["Attention"] + + +class Attention(nn.Module): + """Attention from `"Dynamic Domain Generalization" `_. + """ + + def __init__( + self, + in_channels: int, + out_features: int, + squeeze=None, + bias: bool = True + ): + super(Attention, self).__init__() + self.squeeze = squeeze if squeeze else in_channels // 16 + assert self.squeeze > 0 + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Linear(in_channels, self.squeeze, bias=bias) + self.fc2 = nn.Linear(self.squeeze, out_features, bias=bias) + self.sf = nn.Softmax(dim=-1) + + def forward(self, x): + x = self.avg_pool(x).view(x.shape[:-2]) + x = self.fc1(x) + x = F.relu(x, inplace=True) + x = self.fc2(x) + return self.sf(x) diff --git a/Dassl.pytorch/dassl/modeling/ops/conv.py b/Dassl.pytorch/dassl/modeling/ops/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..fcee716fe1caa5a01cc03eb6f33230c01fd700fb --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/conv.py @@ -0,0 +1,95 @@ +import torch.nn as nn + +from .attention import Attention + +__all__ = ["Conv2dDynamic"] + + +class Conv2dDynamic(nn.Module): + """Conv2dDynamic from `"Dynamic Domain Generalization" `_. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int, + padding: int, + bias: bool = True, + squeeze: int = None, + attention_in_channels: int = None + ) -> None: + super(Conv2dDynamic, self).__init__() + + if kernel_size // 2 != padding: + # Only when this condition is met, we can ensure that different + # kernel_size can obtain feature maps of consistent size. + # Let I, K, S, P, O: O = (I + 2P - K) // S + 1, if P = K // 2, then O = (I - K % 2) // S + 1 + # This means that the output of two different Ks with the same parity can be made the same by adjusting P. + raise ValueError("`padding` must be equal to `kernel_size // 2`.") + if kernel_size % 2 == 0: + raise ValueError( + "Kernel_size must be odd now because the templates we used are odd (kernel_size=1)." + ) + + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=bias + ) + self.kernel_templates = nn.ModuleDict() + self.kernel_templates["conv_nn"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=min(in_channels, out_channels), + bias=bias + ) + self.kernel_templates["conv_11"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + padding=0, + bias=bias + ) + self.kernel_templates["conv_n1"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=(kernel_size, 1), + stride=stride, + padding=(padding, 0), + bias=bias + ) + self.kernel_templates["conv_1n"] = nn.Conv2d( + in_channels, + out_channels, + kernel_size=(1, kernel_size), + stride=stride, + padding=(0, padding), + bias=bias + ) + self.attention = Attention( + attention_in_channels if attention_in_channels else in_channels, + 4, + squeeze, + bias=bias + ) + + def forward(self, x, attention_x=None): + attention_x = x if attention_x is None else attention_x + y = self.attention(attention_x) + + out = self.conv(x) + + for i, template in enumerate(self.kernel_templates): + out += self.kernel_templates[template](x) * y[:, + i].view(-1, 1, 1, 1) + + return out diff --git a/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py b/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py new file mode 100644 index 0000000000000000000000000000000000000000..21723b026d6f846e96b49c31943c52a21677f277 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/cross_entropy.py @@ -0,0 +1,30 @@ +import torch +from torch.nn import functional as F + + +def cross_entropy(input, target, label_smooth=0, reduction="mean"): + """Cross entropy loss. + + Args: + input (torch.Tensor): logit matrix with shape of (batch, num_classes). + target (torch.LongTensor): int label matrix. + label_smooth (float, optional): label smoothing hyper-parameter. + Default is 0. + reduction (str, optional): how the losses for a mini-batch + will be aggregated. Default is 'mean'. + """ + num_classes = input.shape[1] + log_prob = F.log_softmax(input, dim=1) + zeros = torch.zeros(log_prob.size()) + target = zeros.scatter_(1, target.unsqueeze(1).data.cpu(), 1) + target = target.type_as(input) + target = (1-label_smooth) * target + label_smooth/num_classes + loss = (-target * log_prob).sum(1) + if reduction == "mean": + return loss.mean() + elif reduction == "sum": + return loss.sum() + elif reduction == "none": + return loss + else: + raise ValueError diff --git a/Dassl.pytorch/dassl/modeling/ops/dsbn.py b/Dassl.pytorch/dassl/modeling/ops/dsbn.py new file mode 100644 index 0000000000000000000000000000000000000000..e3ee35509d16a3a3d4ee6d5d963b12aab62eb1e7 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/dsbn.py @@ -0,0 +1,45 @@ +import torch.nn as nn + + +class _DSBN(nn.Module): + """Domain Specific Batch Normalization. + + Args: + num_features (int): number of features. + n_domain (int): number of domains. + bn_type (str): type of bn. Choices are ['1d', '2d']. + """ + + def __init__(self, num_features, n_domain, bn_type): + super().__init__() + if bn_type == "1d": + BN = nn.BatchNorm1d + elif bn_type == "2d": + BN = nn.BatchNorm2d + else: + raise ValueError + + self.bn = nn.ModuleList(BN(num_features) for _ in range(n_domain)) + + self.valid_domain_idxs = list(range(n_domain)) + self.n_domain = n_domain + self.domain_idx = 0 + + def select_bn(self, domain_idx=0): + assert domain_idx in self.valid_domain_idxs + self.domain_idx = domain_idx + + def forward(self, x): + return self.bn[self.domain_idx](x) + + +class DSBN1d(_DSBN): + + def __init__(self, num_features, n_domain): + super().__init__(num_features, n_domain, "1d") + + +class DSBN2d(_DSBN): + + def __init__(self, num_features, n_domain): + super().__init__(num_features, n_domain, "2d") diff --git a/Dassl.pytorch/dassl/modeling/ops/efdmix.py b/Dassl.pytorch/dassl/modeling/ops/efdmix.py new file mode 100644 index 0000000000000000000000000000000000000000..af58815af3619c3b3b8a19f45ec186ec5d49787b --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/efdmix.py @@ -0,0 +1,118 @@ +import random +from contextlib import contextmanager +import torch +import torch.nn as nn + + +def deactivate_efdmix(m): + if type(m) == EFDMix: + m.set_activation_status(False) + + +def activate_efdmix(m): + if type(m) == EFDMix: + m.set_activation_status(True) + + +def random_efdmix(m): + if type(m) == EFDMix: + m.update_mix_method("random") + + +def crossdomain_efdmix(m): + if type(m) == EFDMix: + m.update_mix_method("crossdomain") + + +@contextmanager +def run_without_efdmix(model): + # Assume MixStyle was initially activated + try: + model.apply(deactivate_efdmix) + yield + finally: + model.apply(activate_efdmix) + + +@contextmanager +def run_with_efdmix(model, mix=None): + # Assume MixStyle was initially deactivated + if mix == "random": + model.apply(random_efdmix) + + elif mix == "crossdomain": + model.apply(crossdomain_efdmix) + + try: + model.apply(activate_efdmix) + yield + finally: + model.apply(deactivate_efdmix) + + +class EFDMix(nn.Module): + """EFDMix. + + Reference: + Zhang et al. Exact Feature Distribution Matching for Arbitrary Style Transfer and Domain Generalization. CVPR 2022. + """ + + def __init__(self, p=0.5, alpha=0.1, eps=1e-6, mix="random"): + """ + Args: + p (float): probability of using MixStyle. + alpha (float): parameter of the Beta distribution. + eps (float): scaling parameter to avoid numerical issues. + mix (str): how to mix. + """ + super().__init__() + self.p = p + self.beta = torch.distributions.Beta(alpha, alpha) + self.eps = eps + self.alpha = alpha + self.mix = mix + self._activated = True + + def __repr__(self): + return ( + f"MixStyle(p={self.p}, alpha={self.alpha}, eps={self.eps}, mix={self.mix})" + ) + + def set_activation_status(self, status=True): + self._activated = status + + def update_mix_method(self, mix="random"): + self.mix = mix + + def forward(self, x): + if not self.training or not self._activated: + return x + + if random.random() > self.p: + return x + + B, C, W, H = x.size(0), x.size(1), x.size(2), x.size(3) + x_view = x.view(B, C, -1) + value_x, index_x = torch.sort(x_view) # sort inputs + lmda = self.beta.sample((B, 1, 1)) + lmda = lmda.to(x.device) + + if self.mix == "random": + # random shuffle + perm = torch.randperm(B) + + elif self.mix == "crossdomain": + # split into two halves and swap the order + perm = torch.arange(B - 1, -1, -1) # inverse index + perm_b, perm_a = perm.chunk(2) + perm_b = perm_b[torch.randperm(perm_b.shape[0])] + perm_a = perm_a[torch.randperm(perm_a.shape[0])] + perm = torch.cat([perm_b, perm_a], 0) + + else: + raise NotImplementedError + + inverse_index = index_x.argsort(-1) + x_view_copy = value_x[perm].gather(-1, inverse_index) + new_x = x_view + (x_view_copy - x_view.detach()) * (1-lmda) + return new_x.view(B, C, W, H) diff --git a/Dassl.pytorch/dassl/modeling/ops/mixstyle.py b/Dassl.pytorch/dassl/modeling/ops/mixstyle.py new file mode 100644 index 0000000000000000000000000000000000000000..34f47a89f8eaca215e34914dfacc74d904ad8cca --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/mixstyle.py @@ -0,0 +1,124 @@ +import random +from contextlib import contextmanager +import torch +import torch.nn as nn + + +def deactivate_mixstyle(m): + if type(m) == MixStyle: + m.set_activation_status(False) + + +def activate_mixstyle(m): + if type(m) == MixStyle: + m.set_activation_status(True) + + +def random_mixstyle(m): + if type(m) == MixStyle: + m.update_mix_method("random") + + +def crossdomain_mixstyle(m): + if type(m) == MixStyle: + m.update_mix_method("crossdomain") + + +@contextmanager +def run_without_mixstyle(model): + # Assume MixStyle was initially activated + try: + model.apply(deactivate_mixstyle) + yield + finally: + model.apply(activate_mixstyle) + + +@contextmanager +def run_with_mixstyle(model, mix=None): + # Assume MixStyle was initially deactivated + if mix == "random": + model.apply(random_mixstyle) + + elif mix == "crossdomain": + model.apply(crossdomain_mixstyle) + + try: + model.apply(activate_mixstyle) + yield + finally: + model.apply(deactivate_mixstyle) + + +class MixStyle(nn.Module): + """MixStyle. + + Reference: + Zhou et al. Domain Generalization with MixStyle. ICLR 2021. + """ + + def __init__(self, p=0.5, alpha=0.1, eps=1e-6, mix="random"): + """ + Args: + p (float): probability of using MixStyle. + alpha (float): parameter of the Beta distribution. + eps (float): scaling parameter to avoid numerical issues. + mix (str): how to mix. + """ + super().__init__() + self.p = p + self.beta = torch.distributions.Beta(alpha, alpha) + self.eps = eps + self.alpha = alpha + self.mix = mix + self._activated = True + + def __repr__(self): + return ( + f"MixStyle(p={self.p}, alpha={self.alpha}, eps={self.eps}, mix={self.mix})" + ) + + def set_activation_status(self, status=True): + self._activated = status + + def update_mix_method(self, mix="random"): + self.mix = mix + + def forward(self, x): + if not self.training or not self._activated: + return x + + if random.random() > self.p: + return x + + B = x.size(0) + + mu = x.mean(dim=[2, 3], keepdim=True) + var = x.var(dim=[2, 3], keepdim=True) + sig = (var + self.eps).sqrt() + mu, sig = mu.detach(), sig.detach() + x_normed = (x-mu) / sig + + lmda = self.beta.sample((B, 1, 1, 1)) + lmda = lmda.to(x.device) + + if self.mix == "random": + # random shuffle + perm = torch.randperm(B) + + elif self.mix == "crossdomain": + # split into two halves and swap the order + perm = torch.arange(B - 1, -1, -1) # inverse index + perm_b, perm_a = perm.chunk(2) + perm_b = perm_b[torch.randperm(perm_b.shape[0])] + perm_a = perm_a[torch.randperm(perm_a.shape[0])] + perm = torch.cat([perm_b, perm_a], 0) + + else: + raise NotImplementedError + + mu2, sig2 = mu[perm], sig[perm] + mu_mix = mu*lmda + mu2 * (1-lmda) + sig_mix = sig*lmda + sig2 * (1-lmda) + + return x_normed*sig_mix + mu_mix diff --git a/Dassl.pytorch/dassl/modeling/ops/mixup.py b/Dassl.pytorch/dassl/modeling/ops/mixup.py new file mode 100644 index 0000000000000000000000000000000000000000..5844074a6aaa72bf9ca8be7f3ec5f7a646b144de --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/mixup.py @@ -0,0 +1,23 @@ +import torch + + +def mixup(x1, x2, y1, y2, beta, preserve_order=False): + """Mixup. + + Args: + x1 (torch.Tensor): data with shape of (b, c, h, w). + x2 (torch.Tensor): data with shape of (b, c, h, w). + y1 (torch.Tensor): label with shape of (b, n). + y2 (torch.Tensor): label with shape of (b, n). + beta (float): hyper-parameter for Beta sampling. + preserve_order (bool): apply lmda=max(lmda, 1-lmda). + Default is False. + """ + lmda = torch.distributions.Beta(beta, beta).sample([x1.shape[0], 1, 1, 1]) + if preserve_order: + lmda = torch.max(lmda, 1 - lmda) + lmda = lmda.to(x1.device) + xmix = x1*lmda + x2 * (1-lmda) + lmda = lmda[:, :, 0, 0] + ymix = y1*lmda + y2 * (1-lmda) + return xmix, ymix diff --git a/Dassl.pytorch/dassl/modeling/ops/mmd.py b/Dassl.pytorch/dassl/modeling/ops/mmd.py new file mode 100644 index 0000000000000000000000000000000000000000..a23fa575cc5722a1079176c378dc96c10029510e --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/mmd.py @@ -0,0 +1,91 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + + +class MaximumMeanDiscrepancy(nn.Module): + + def __init__(self, kernel_type="rbf", normalize=False): + super().__init__() + self.kernel_type = kernel_type + self.normalize = normalize + + def forward(self, x, y): + # x, y: two batches of data with shape (batch, dim) + # MMD^2(x, y) = k(x, x') - 2k(x, y) + k(y, y') + if self.normalize: + x = F.normalize(x, dim=1) + y = F.normalize(y, dim=1) + if self.kernel_type == "linear": + return self.linear_mmd(x, y) + elif self.kernel_type == "poly": + return self.poly_mmd(x, y) + elif self.kernel_type == "rbf": + return self.rbf_mmd(x, y) + else: + raise NotImplementedError + + def linear_mmd(self, x, y): + # k(x, y) = x^T y + k_xx = self.remove_self_distance(torch.mm(x, x.t())) + k_yy = self.remove_self_distance(torch.mm(y, y.t())) + k_xy = torch.mm(x, y.t()) + return k_xx.mean() + k_yy.mean() - 2 * k_xy.mean() + + def poly_mmd(self, x, y, alpha=1.0, c=2.0, d=2): + # k(x, y) = (alpha * x^T y + c)^d + k_xx = self.remove_self_distance(torch.mm(x, x.t())) + k_xx = (alpha*k_xx + c).pow(d) + k_yy = self.remove_self_distance(torch.mm(y, y.t())) + k_yy = (alpha*k_yy + c).pow(d) + k_xy = torch.mm(x, y.t()) + k_xy = (alpha*k_xy + c).pow(d) + return k_xx.mean() + k_yy.mean() - 2 * k_xy.mean() + + def rbf_mmd(self, x, y): + # k_xx + d_xx = self.euclidean_squared_distance(x, x) + d_xx = self.remove_self_distance(d_xx) + k_xx = self.rbf_kernel_mixture(d_xx) + # k_yy + d_yy = self.euclidean_squared_distance(y, y) + d_yy = self.remove_self_distance(d_yy) + k_yy = self.rbf_kernel_mixture(d_yy) + # k_xy + d_xy = self.euclidean_squared_distance(x, y) + k_xy = self.rbf_kernel_mixture(d_xy) + return k_xx.mean() + k_yy.mean() - 2 * k_xy.mean() + + @staticmethod + def rbf_kernel_mixture(exponent, sigmas=[1, 5, 10]): + K = 0 + for sigma in sigmas: + gamma = 1.0 / (2.0 * sigma**2) + K += torch.exp(-gamma * exponent) + return K + + @staticmethod + def remove_self_distance(distmat): + tmp_list = [] + for i, row in enumerate(distmat): + row1 = torch.cat([row[:i], row[i + 1:]]) + tmp_list.append(row1) + return torch.stack(tmp_list) + + @staticmethod + def euclidean_squared_distance(x, y): + m, n = x.size(0), y.size(0) + distmat = ( + torch.pow(x, 2).sum(dim=1, keepdim=True).expand(m, n) + + torch.pow(y, 2).sum(dim=1, keepdim=True).expand(n, m).t() + ) + # distmat.addmm_(1, -2, x, y.t()) + distmat.addmm_(x, y.t(), beta=1, alpha=-2) + return distmat + + +if __name__ == "__main__": + mmd = MaximumMeanDiscrepancy(kernel_type="rbf") + input1, input2 = torch.rand(3, 100), torch.rand(3, 100) + d = mmd(input1, input2) + print(d.item()) diff --git a/Dassl.pytorch/dassl/modeling/ops/optimal_transport.py b/Dassl.pytorch/dassl/modeling/ops/optimal_transport.py new file mode 100644 index 0000000000000000000000000000000000000000..128ea96b33e877bd9e992414ac0e744f0d311438 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/optimal_transport.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn +from torch.nn import functional as F + + +class OptimalTransport(nn.Module): + + @staticmethod + def distance(batch1, batch2, dist_metric="cosine"): + if dist_metric == "cosine": + batch1 = F.normalize(batch1, p=2, dim=1) + batch2 = F.normalize(batch2, p=2, dim=1) + dist_mat = 1 - torch.mm(batch1, batch2.t()) + elif dist_metric == "euclidean": + m, n = batch1.size(0), batch2.size(0) + dist_mat = ( + torch.pow(batch1, 2).sum(dim=1, keepdim=True).expand(m, n) + + torch.pow(batch2, 2).sum(dim=1, keepdim=True).expand(n, m).t() + ) + dist_mat.addmm_( + 1, -2, batch1, batch2.t() + ) # squared euclidean distance + elif dist_metric == "fast_euclidean": + batch1 = batch1.unsqueeze(-2) + batch2 = batch2.unsqueeze(-3) + dist_mat = torch.sum((torch.abs(batch1 - batch2))**2, -1) + else: + raise ValueError( + "Unknown cost function: {}. Expected to " + "be one of [cosine | euclidean]".format(dist_metric) + ) + return dist_mat + + +class SinkhornDivergence(OptimalTransport): + thre = 1e-3 + + def __init__( + self, + dist_metric="cosine", + eps=0.01, + max_iter=5, + bp_to_sinkhorn=False + ): + super().__init__() + self.dist_metric = dist_metric + self.eps = eps + self.max_iter = max_iter + self.bp_to_sinkhorn = bp_to_sinkhorn + + def forward(self, x, y): + # x, y: two batches of data with shape (batch, dim) + W_xy = self.transport_cost(x, y) + W_xx = self.transport_cost(x, x) + W_yy = self.transport_cost(y, y) + return 2*W_xy - W_xx - W_yy + + def transport_cost(self, x, y, return_pi=False): + C = self.distance(x, y, dist_metric=self.dist_metric) + pi = self.sinkhorn_iterate(C, self.eps, self.max_iter, self.thre) + if not self.bp_to_sinkhorn: + pi = pi.detach() + cost = torch.sum(pi * C) + if return_pi: + return cost, pi + return cost + + @staticmethod + def sinkhorn_iterate(C, eps, max_iter, thre): + nx, ny = C.shape + mu = torch.ones(nx, dtype=C.dtype, device=C.device) * (1.0/nx) + nu = torch.ones(ny, dtype=C.dtype, device=C.device) * (1.0/ny) + u = torch.zeros_like(mu) + v = torch.zeros_like(nu) + + def M(_C, _u, _v): + """Modified cost for logarithmic updates. + Eq: M_{ij} = (-c_{ij} + u_i + v_j) / epsilon + """ + return (-_C + _u.unsqueeze(-1) + _v.unsqueeze(-2)) / eps + + real_iter = 0 # check if algorithm terminates before max_iter + # Sinkhorn iterations + for i in range(max_iter): + u0 = u + u = eps * ( + torch.log(mu + 1e-8) - torch.logsumexp(M(C, u, v), dim=1) + ) + u + v = ( + eps * ( + torch.log(nu + 1e-8) - + torch.logsumexp(M(C, u, v).permute(1, 0), dim=1) + ) + v + ) + err = (u - u0).abs().sum() + real_iter += 1 + if err.item() < thre: + break + # Transport plan pi = diag(a)*K*diag(b) + return torch.exp(M(C, u, v)) + + +class MinibatchEnergyDistance(SinkhornDivergence): + + def __init__( + self, + dist_metric="cosine", + eps=0.01, + max_iter=5, + bp_to_sinkhorn=False + ): + super().__init__( + dist_metric=dist_metric, + eps=eps, + max_iter=max_iter, + bp_to_sinkhorn=bp_to_sinkhorn, + ) + + def forward(self, x, y): + x1, x2 = torch.split(x, x.size(0) // 2, dim=0) + y1, y2 = torch.split(y, y.size(0) // 2, dim=0) + cost = 0 + cost += self.transport_cost(x1, y1) + cost += self.transport_cost(x1, y2) + cost += self.transport_cost(x2, y1) + cost += self.transport_cost(x2, y2) + cost -= 2 * self.transport_cost(x1, x2) + cost -= 2 * self.transport_cost(y1, y2) + return cost + + +if __name__ == "__main__": + # example: https://dfdazac.github.io/sinkhorn.html + import numpy as np + + n_points = 5 + a = np.array([[i, 0] for i in range(n_points)]) + b = np.array([[i, 1] for i in range(n_points)]) + x = torch.tensor(a, dtype=torch.float) + y = torch.tensor(b, dtype=torch.float) + sinkhorn = SinkhornDivergence( + dist_metric="euclidean", eps=0.01, max_iter=5 + ) + dist, pi = sinkhorn.transport_cost(x, y, True) + import pdb + + pdb.set_trace() diff --git a/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py b/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py new file mode 100644 index 0000000000000000000000000000000000000000..34bab9db2ab1eafe369aae02ba5856bd8fa15bf8 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/reverse_grad.py @@ -0,0 +1,34 @@ +import torch.nn as nn +from torch.autograd import Function + + +class _ReverseGrad(Function): + + @staticmethod + def forward(ctx, input, grad_scaling): + ctx.grad_scaling = grad_scaling + return input.view_as(input) + + @staticmethod + def backward(ctx, grad_output): + grad_scaling = ctx.grad_scaling + return -grad_scaling * grad_output, None + + +reverse_grad = _ReverseGrad.apply + + +class ReverseGrad(nn.Module): + """Gradient reversal layer. + + It acts as an identity layer in the forward, + but reverses the sign of the gradient in + the backward. + """ + + def forward(self, x, grad_scaling=1.0): + assert (grad_scaling >= + 0), "grad_scaling must be non-negative, " "but got {}".format( + grad_scaling + ) + return reverse_grad(x, grad_scaling) diff --git a/Dassl.pytorch/dassl/modeling/ops/sequential2.py b/Dassl.pytorch/dassl/modeling/ops/sequential2.py new file mode 100644 index 0000000000000000000000000000000000000000..47a83834f3188db78e336d809d0f94e7546792ca --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/sequential2.py @@ -0,0 +1,15 @@ +import torch.nn as nn + + +class Sequential2(nn.Sequential): + """An alternative sequential container to nn.Sequential, + which accepts an arbitrary number of input arguments. + """ + + def forward(self, *inputs): + for module in self._modules.values(): + if isinstance(inputs, tuple): + inputs = module(*inputs) + else: + inputs = module(inputs) + return inputs diff --git a/Dassl.pytorch/dassl/modeling/ops/transnorm.py b/Dassl.pytorch/dassl/modeling/ops/transnorm.py new file mode 100644 index 0000000000000000000000000000000000000000..453db773d77d9661f52e32447662bff3d30e5c9a --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/transnorm.py @@ -0,0 +1,138 @@ +import torch +import torch.nn as nn + + +class _TransNorm(nn.Module): + """Transferable normalization. + + Reference: + - Wang et al. Transferable Normalization: Towards Improving + Transferability of Deep Neural Networks. NeurIPS 2019. + + Args: + num_features (int): number of features. + eps (float): epsilon. + momentum (float): value for updating running_mean and running_var. + adaptive_alpha (bool): apply domain adaptive alpha. + """ + + def __init__( + self, num_features, eps=1e-5, momentum=0.1, adaptive_alpha=True + ): + super().__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.adaptive_alpha = adaptive_alpha + + self.register_buffer("running_mean_s", torch.zeros(num_features)) + self.register_buffer("running_var_s", torch.ones(num_features)) + self.register_buffer("running_mean_t", torch.zeros(num_features)) + self.register_buffer("running_var_t", torch.ones(num_features)) + + self.weight = nn.Parameter(torch.ones(num_features)) + self.bias = nn.Parameter(torch.zeros(num_features)) + + def resnet_running_stats(self): + self.running_mean_s.zero_() + self.running_var_s.fill_(1) + self.running_mean_t.zero_() + self.running_var_t.fill_(1) + + def reset_parameters(self): + nn.init.ones_(self.weight) + nn.init.zeros_(self.bias) + + def _check_input(self, x): + raise NotImplementedError + + def _compute_alpha(self, mean_s, var_s, mean_t, var_t): + C = self.num_features + ratio_s = mean_s / (var_s + self.eps).sqrt() + ratio_t = mean_t / (var_t + self.eps).sqrt() + dist = (ratio_s - ratio_t).abs() + dist_inv = 1 / (1+dist) + return C * dist_inv / dist_inv.sum() + + def forward(self, input): + self._check_input(input) + C = self.num_features + if input.dim() == 2: + new_shape = (1, C) + elif input.dim() == 4: + new_shape = (1, C, 1, 1) + else: + raise ValueError + + weight = self.weight.view(*new_shape) + bias = self.bias.view(*new_shape) + + if not self.training: + mean_t = self.running_mean_t.view(*new_shape) + var_t = self.running_var_t.view(*new_shape) + output = (input-mean_t) / (var_t + self.eps).sqrt() + output = output*weight + bias + + if self.adaptive_alpha: + mean_s = self.running_mean_s.view(*new_shape) + var_s = self.running_var_s.view(*new_shape) + alpha = self._compute_alpha(mean_s, var_s, mean_t, var_t) + alpha = alpha.reshape(*new_shape) + output = (1 + alpha.detach()) * output + + return output + + input_s, input_t = torch.split(input, input.shape[0] // 2, dim=0) + + x_s = input_s.transpose(0, 1).reshape(C, -1) + mean_s = x_s.mean(1) + var_s = x_s.var(1) + self.running_mean_s.mul_(self.momentum) + self.running_mean_s.add_((1 - self.momentum) * mean_s.data) + self.running_var_s.mul_(self.momentum) + self.running_var_s.add_((1 - self.momentum) * var_s.data) + mean_s = mean_s.reshape(*new_shape) + var_s = var_s.reshape(*new_shape) + output_s = (input_s-mean_s) / (var_s + self.eps).sqrt() + output_s = output_s*weight + bias + + x_t = input_t.transpose(0, 1).reshape(C, -1) + mean_t = x_t.mean(1) + var_t = x_t.var(1) + self.running_mean_t.mul_(self.momentum) + self.running_mean_t.add_((1 - self.momentum) * mean_t.data) + self.running_var_t.mul_(self.momentum) + self.running_var_t.add_((1 - self.momentum) * var_t.data) + mean_t = mean_t.reshape(*new_shape) + var_t = var_t.reshape(*new_shape) + output_t = (input_t-mean_t) / (var_t + self.eps).sqrt() + output_t = output_t*weight + bias + + output = torch.cat([output_s, output_t], 0) + + if self.adaptive_alpha: + alpha = self._compute_alpha(mean_s, var_s, mean_t, var_t) + alpha = alpha.reshape(*new_shape) + output = (1 + alpha.detach()) * output + + return output + + +class TransNorm1d(_TransNorm): + + def _check_input(self, x): + if x.dim() != 2: + raise ValueError( + "Expected the input to be 2-D, " + "but got {}-D".format(x.dim()) + ) + + +class TransNorm2d(_TransNorm): + + def _check_input(self, x): + if x.dim() != 4: + raise ValueError( + "Expected the input to be 4-D, " + "but got {}-D".format(x.dim()) + ) diff --git a/Dassl.pytorch/dassl/modeling/ops/utils.py b/Dassl.pytorch/dassl/modeling/ops/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6bfcc898f62d6c642726b3a05cd3d586872ebc07 --- /dev/null +++ b/Dassl.pytorch/dassl/modeling/ops/utils.py @@ -0,0 +1,77 @@ +import numpy as np +import torch + + +def sharpen_prob(p, temperature=2): + """Sharpening probability with a temperature. + + Args: + p (torch.Tensor): probability matrix (batch_size, n_classes) + temperature (float): temperature. + """ + p = p.pow(temperature) + return p / p.sum(1, keepdim=True) + + +def reverse_index(data, label): + """Reverse order.""" + inv_idx = torch.arange(data.size(0) - 1, -1, -1).long() + return data[inv_idx], label[inv_idx] + + +def shuffle_index(data, label): + """Shuffle order.""" + rnd_idx = torch.randperm(data.shape[0]) + return data[rnd_idx], label[rnd_idx] + + +def create_onehot(label, num_classes): + """Create one-hot tensor. + + We suggest using nn.functional.one_hot. + + Args: + label (torch.Tensor): 1-D tensor. + num_classes (int): number of classes. + """ + onehot = torch.zeros(label.shape[0], num_classes) + onehot = onehot.scatter(1, label.unsqueeze(1).data.cpu(), 1) + onehot = onehot.to(label.device) + return onehot + + +def sigmoid_rampup(current, rampup_length): + """Exponential rampup. + + Args: + current (int): current step. + rampup_length (int): maximum step. + """ + assert rampup_length > 0 + current = np.clip(current, 0.0, rampup_length) + phase = 1.0 - current/rampup_length + return float(np.exp(-5.0 * phase * phase)) + + +def linear_rampup(current, rampup_length): + """Linear rampup. + + Args: + current (int): current step. + rampup_length (int): maximum step. + """ + assert rampup_length > 0 + ratio = np.clip(current / rampup_length, 0.0, 1.0) + return float(ratio) + + +def ema_model_update(model, ema_model, alpha): + """Exponential moving average of model parameters. + + Args: + model (nn.Module): model being trained. + ema_model (nn.Module): ema of the model. + alpha (float): ema decay rate. + """ + for ema_param, param in zip(ema_model.parameters(), model.parameters()): + ema_param.data.mul_(alpha).add_(param.data, alpha=1 - alpha) diff --git a/Dassl.pytorch/dassl/optim/__init__.py b/Dassl.pytorch/dassl/optim/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e7ef4c04212eb6f030baa075af8d7e367a229b82 --- /dev/null +++ b/Dassl.pytorch/dassl/optim/__init__.py @@ -0,0 +1,2 @@ +from .optimizer import build_optimizer +from .lr_scheduler import build_lr_scheduler diff --git a/Dassl.pytorch/dassl/optim/lr_scheduler.py b/Dassl.pytorch/dassl/optim/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..48d58853abee02b4f11bf20eab6d942bdf26c018 --- /dev/null +++ b/Dassl.pytorch/dassl/optim/lr_scheduler.py @@ -0,0 +1,152 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import torch +from torch.optim.lr_scheduler import _LRScheduler + +AVAI_SCHEDS = ["single_step", "multi_step", "cosine"] + + +class _BaseWarmupScheduler(_LRScheduler): + + def __init__( + self, + optimizer, + successor, + warmup_epoch, + last_epoch=-1, + verbose=False + ): + self.successor = successor + self.warmup_epoch = warmup_epoch + super().__init__(optimizer, last_epoch, verbose) + + def get_lr(self): + raise NotImplementedError + + def step(self, epoch=None): + if self.last_epoch >= self.warmup_epoch: + self.successor.step(epoch) + self._last_lr = self.successor.get_last_lr() + else: + super().step(epoch) + + +class ConstantWarmupScheduler(_BaseWarmupScheduler): + + def __init__( + self, + optimizer, + successor, + warmup_epoch, + cons_lr, + last_epoch=-1, + verbose=False + ): + self.cons_lr = cons_lr + super().__init__( + optimizer, successor, warmup_epoch, last_epoch, verbose + ) + + def get_lr(self): + if self.last_epoch >= self.warmup_epoch: + return self.successor.get_last_lr() + return [self.cons_lr for _ in self.base_lrs] + + +class LinearWarmupScheduler(_BaseWarmupScheduler): + + def __init__( + self, + optimizer, + successor, + warmup_epoch, + min_lr, + last_epoch=-1, + verbose=False + ): + self.min_lr = min_lr + super().__init__( + optimizer, successor, warmup_epoch, last_epoch, verbose + ) + + def get_lr(self): + if self.last_epoch >= self.warmup_epoch: + return self.successor.get_last_lr() + if self.last_epoch == 0: + return [self.min_lr for _ in self.base_lrs] + return [ + lr * self.last_epoch / self.warmup_epoch for lr in self.base_lrs + ] + + +def build_lr_scheduler(optimizer, optim_cfg): + """A function wrapper for building a learning rate scheduler. + + Args: + optimizer (Optimizer): an Optimizer. + optim_cfg (CfgNode): optimization config. + """ + lr_scheduler = optim_cfg.LR_SCHEDULER + stepsize = optim_cfg.STEPSIZE + gamma = optim_cfg.GAMMA + max_epoch = optim_cfg.MAX_EPOCH + + if lr_scheduler not in AVAI_SCHEDS: + raise ValueError( + f"scheduler must be one of {AVAI_SCHEDS}, but got {lr_scheduler}" + ) + + if lr_scheduler == "single_step": + if isinstance(stepsize, (list, tuple)): + stepsize = stepsize[-1] + + if not isinstance(stepsize, int): + raise TypeError( + "For single_step lr_scheduler, stepsize must " + f"be an integer, but got {type(stepsize)}" + ) + + if stepsize <= 0: + stepsize = max_epoch + + scheduler = torch.optim.lr_scheduler.StepLR( + optimizer, step_size=stepsize, gamma=gamma + ) + + elif lr_scheduler == "multi_step": + if not isinstance(stepsize, (list, tuple)): + raise TypeError( + "For multi_step lr_scheduler, stepsize must " + f"be a list, but got {type(stepsize)}" + ) + + scheduler = torch.optim.lr_scheduler.MultiStepLR( + optimizer, milestones=stepsize, gamma=gamma + ) + + elif lr_scheduler == "cosine": + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, float(max_epoch) + ) + + if optim_cfg.WARMUP_EPOCH > 0: + if not optim_cfg.WARMUP_RECOUNT: + scheduler.last_epoch = optim_cfg.WARMUP_EPOCH + + if optim_cfg.WARMUP_TYPE == "constant": + scheduler = ConstantWarmupScheduler( + optimizer, scheduler, optim_cfg.WARMUP_EPOCH, + optim_cfg.WARMUP_CONS_LR + ) + + elif optim_cfg.WARMUP_TYPE == "linear": + scheduler = LinearWarmupScheduler( + optimizer, scheduler, optim_cfg.WARMUP_EPOCH, + optim_cfg.WARMUP_MIN_LR + ) + + else: + raise ValueError + + return scheduler diff --git a/Dassl.pytorch/dassl/optim/optimizer.py b/Dassl.pytorch/dassl/optim/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..5ebcd62291472faf70f8715e23b72d289f8c01bb --- /dev/null +++ b/Dassl.pytorch/dassl/optim/optimizer.py @@ -0,0 +1,142 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import warnings +import torch +import torch.nn as nn + +from .radam import RAdam + +AVAI_OPTIMS = ["adam", "amsgrad", "sgd", "rmsprop", "radam", "adamw"] + + +def build_optimizer(model, optim_cfg, param_groups=None): + """A function wrapper for building an optimizer. + + Args: + model (nn.Module or iterable): model. + optim_cfg (CfgNode): optimization config. + param_groups: If provided, directly optimize param_groups and abandon model + """ + optim = optim_cfg.NAME + lr = optim_cfg.LR + weight_decay = optim_cfg.WEIGHT_DECAY + momentum = optim_cfg.MOMENTUM + sgd_dampening = optim_cfg.SGD_DAMPNING + sgd_nesterov = optim_cfg.SGD_NESTEROV + rmsprop_alpha = optim_cfg.RMSPROP_ALPHA + adam_beta1 = optim_cfg.ADAM_BETA1 + adam_beta2 = optim_cfg.ADAM_BETA2 + staged_lr = optim_cfg.STAGED_LR + new_layers = optim_cfg.NEW_LAYERS + base_lr_mult = optim_cfg.BASE_LR_MULT + + if optim not in AVAI_OPTIMS: + raise ValueError( + f"optim must be one of {AVAI_OPTIMS}, but got {optim}" + ) + + if param_groups is not None and staged_lr: + warnings.warn( + "staged_lr will be ignored, if you need to use staged_lr, " + "please bind it with param_groups yourself." + ) + + if param_groups is None: + if staged_lr: + if not isinstance(model, nn.Module): + raise TypeError( + "When staged_lr is True, model given to " + "build_optimizer() must be an instance of nn.Module" + ) + + if isinstance(model, nn.DataParallel): + model = model.module + + if isinstance(new_layers, str): + if new_layers is None: + warnings.warn("new_layers is empty (staged_lr is useless)") + new_layers = [new_layers] + + base_params = [] + base_layers = [] + new_params = [] + + for name, module in model.named_children(): + if name in new_layers: + new_params += [p for p in module.parameters()] + else: + base_params += [p for p in module.parameters()] + base_layers.append(name) + + param_groups = [ + { + "params": base_params, + "lr": lr * base_lr_mult + }, + { + "params": new_params + }, + ] + + else: + if isinstance(model, nn.Module): + param_groups = model.parameters() + else: + param_groups = model + + if optim == "adam": + optimizer = torch.optim.Adam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + + elif optim == "amsgrad": + optimizer = torch.optim.Adam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + amsgrad=True, + ) + + elif optim == "sgd": + optimizer = torch.optim.SGD( + param_groups, + lr=lr, + momentum=momentum, + weight_decay=weight_decay, + dampening=sgd_dampening, + nesterov=sgd_nesterov, + ) + + elif optim == "rmsprop": + optimizer = torch.optim.RMSprop( + param_groups, + lr=lr, + momentum=momentum, + weight_decay=weight_decay, + alpha=rmsprop_alpha, + ) + + elif optim == "radam": + optimizer = RAdam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + + elif optim == "adamw": + optimizer = torch.optim.AdamW( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + else: + raise NotImplementedError(f"Optimizer {optim} not implemented yet!") + + return optimizer diff --git a/Dassl.pytorch/dassl/optim/radam.py b/Dassl.pytorch/dassl/optim/radam.py new file mode 100644 index 0000000000000000000000000000000000000000..f4c1989f5438edae5573775a26ac723f128c0682 --- /dev/null +++ b/Dassl.pytorch/dassl/optim/radam.py @@ -0,0 +1,332 @@ +""" +Imported from: https://github.com/LiyuanLucasLiu/RAdam + +https://arxiv.org/abs/1908.03265 + +@article{liu2019radam, + title={On the Variance of the Adaptive Learning Rate and Beyond}, + author={Liu, Liyuan and Jiang, Haoming and He, Pengcheng and Chen, Weizhu and Liu, Xiaodong and Gao, Jianfeng and Han, Jiawei}, + journal={arXiv preprint arXiv:1908.03265}, + year={2019} +} +""" +import math +import torch +from torch.optim.optimizer import Optimizer + + +class RAdam(Optimizer): + + def __init__( + self, + params, + lr=1e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + degenerated_to_sgd=True, + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError( + "Invalid beta parameter at index 0: {}".format(betas[0]) + ) + if not 0.0 <= betas[1] < 1.0: + raise ValueError( + "Invalid beta parameter at index 1: {}".format(betas[1]) + ) + + self.degenerated_to_sgd = degenerated_to_sgd + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) + self.buffer = [[None, None, None] for ind in range(10)] + super(RAdam, self).__init__(params, defaults) + + def __setstate__(self, state): + super(RAdam, self).__setstate__(state) + + def step(self, closure=None): + + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError( + "RAdam does not support sparse gradients" + ) + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(p_data_fp32) + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) + state["exp_avg_sq"] = state["exp_avg_sq"].type_as( + p_data_fp32 + ) + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + beta1, beta2 = group["betas"] + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + state["step"] += 1 + buffered = self.buffer[int(state["step"] % 10)] + if state["step"] == buffered[0]: + N_sma, step_size = buffered[1], buffered[2] + else: + buffered[0] = state["step"] + beta2_t = beta2**state["step"] + N_sma_max = 2 / (1-beta2) - 1 + N_sma = N_sma_max - 2 * state["step" + ] * beta2_t / (1-beta2_t) + buffered[1] = N_sma + + # more conservative since it's an approximated value + if N_sma >= 5: + step_size = math.sqrt( + (1-beta2_t) * (N_sma-4) / (N_sma_max-4) * + (N_sma-2) / N_sma * N_sma_max / (N_sma_max-2) + ) / (1 - beta1**state["step"]) + elif self.degenerated_to_sgd: + step_size = 1.0 / (1 - beta1**state["step"]) + else: + step_size = -1 + buffered[2] = step_size + + # more conservative since it's an approximated value + if N_sma >= 5: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + denom = exp_avg_sq.sqrt().add_(group["eps"]) + p_data_fp32.addcdiv_( + -step_size * group["lr"], exp_avg, denom + ) + p.data.copy_(p_data_fp32) + elif step_size > 0: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + p_data_fp32.add_(-step_size * group["lr"], exp_avg) + p.data.copy_(p_data_fp32) + + return loss + + +class PlainRAdam(Optimizer): + + def __init__( + self, + params, + lr=1e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + degenerated_to_sgd=True, + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError( + "Invalid beta parameter at index 0: {}".format(betas[0]) + ) + if not 0.0 <= betas[1] < 1.0: + raise ValueError( + "Invalid beta parameter at index 1: {}".format(betas[1]) + ) + + self.degenerated_to_sgd = degenerated_to_sgd + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) + + super(PlainRAdam, self).__init__(params, defaults) + + def __setstate__(self, state): + super(PlainRAdam, self).__setstate__(state) + + def step(self, closure=None): + + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError( + "RAdam does not support sparse gradients" + ) + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(p_data_fp32) + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) + state["exp_avg_sq"] = state["exp_avg_sq"].type_as( + p_data_fp32 + ) + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + beta1, beta2 = group["betas"] + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + state["step"] += 1 + beta2_t = beta2**state["step"] + N_sma_max = 2 / (1-beta2) - 1 + N_sma = N_sma_max - 2 * state["step"] * beta2_t / (1-beta2_t) + + # more conservative since it's an approximated value + if N_sma >= 5: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + step_size = ( + group["lr"] * math.sqrt( + (1-beta2_t) * (N_sma-4) / (N_sma_max-4) * + (N_sma-2) / N_sma * N_sma_max / (N_sma_max-2) + ) / (1 - beta1**state["step"]) + ) + denom = exp_avg_sq.sqrt().add_(group["eps"]) + p_data_fp32.addcdiv_(-step_size, exp_avg, denom) + p.data.copy_(p_data_fp32) + elif self.degenerated_to_sgd: + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * group["lr"], p_data_fp32 + ) + step_size = group["lr"] / (1 - beta1**state["step"]) + p_data_fp32.add_(-step_size, exp_avg) + p.data.copy_(p_data_fp32) + + return loss + + +class AdamW(Optimizer): + + def __init__( + self, + params, + lr=1e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + warmup=0 + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError( + "Invalid beta parameter at index 0: {}".format(betas[0]) + ) + if not 0.0 <= betas[1] < 1.0: + raise ValueError( + "Invalid beta parameter at index 1: {}".format(betas[1]) + ) + + defaults = dict( + lr=lr, + betas=betas, + eps=eps, + weight_decay=weight_decay, + warmup=warmup + ) + super(AdamW, self).__init__(params, defaults) + + def __setstate__(self, state): + super(AdamW, self).__setstate__(state) + + def step(self, closure=None): + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError( + "Adam does not support sparse gradients, please consider SparseAdam instead" + ) + + p_data_fp32 = p.data.float() + + state = self.state[p] + + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(p_data_fp32) + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) + state["exp_avg_sq"] = state["exp_avg_sq"].type_as( + p_data_fp32 + ) + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + beta1, beta2 = group["betas"] + + state["step"] += 1 + + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + exp_avg.mul_(beta1).add_(1 - beta1, grad) + + denom = exp_avg_sq.sqrt().add_(group["eps"]) + bias_correction1 = 1 - beta1**state["step"] + bias_correction2 = 1 - beta2**state["step"] + + if group["warmup"] > state["step"]: + scheduled_lr = 1e-8 + state["step"] * group["lr"] / group[ + "warmup"] + else: + scheduled_lr = group["lr"] + + step_size = ( + scheduled_lr * math.sqrt(bias_correction2) / + bias_correction1 + ) + + if group["weight_decay"] != 0: + p_data_fp32.add_( + -group["weight_decay"] * scheduled_lr, p_data_fp32 + ) + + p_data_fp32.addcdiv_(-step_size, exp_avg, denom) + + p.data.copy_(p_data_fp32) + + return loss diff --git a/Dassl.pytorch/dassl/utils/__init__.py b/Dassl.pytorch/dassl/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c47679fda46f1f049313e1ff5866f97d4cf41485 --- /dev/null +++ b/Dassl.pytorch/dassl/utils/__init__.py @@ -0,0 +1,5 @@ +from .tools import * +from .logger import * +from .meters import * +from .registry import * +from .torchtools import * diff --git a/Dassl.pytorch/dassl/utils/logger.py b/Dassl.pytorch/dassl/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..d5e680a0e4adff730d67bd7aec65e983ab2423d4 --- /dev/null +++ b/Dassl.pytorch/dassl/utils/logger.py @@ -0,0 +1,73 @@ +import os +import sys +import time +import os.path as osp + +from .tools import mkdir_if_missing + +__all__ = ["Logger", "setup_logger"] + + +class Logger: + """Write console output to external text file. + + Imported from ``_ + + Args: + fpath (str): directory to save logging file. + + Examples:: + >>> import sys + >>> import os.path as osp + >>> save_dir = 'output/experiment-1' + >>> log_name = 'train.log' + >>> sys.stdout = Logger(osp.join(save_dir, log_name)) + """ + + def __init__(self, fpath=None): + self.console = sys.stdout + self.file = None + if fpath is not None: + mkdir_if_missing(osp.dirname(fpath)) + self.file = open(fpath, "w") + + def __del__(self): + self.close() + + def __enter__(self): + pass + + def __exit__(self, *args): + self.close() + + def write(self, msg): + self.console.write(msg) + if self.file is not None: + self.file.write(msg) + + def flush(self): + self.console.flush() + if self.file is not None: + self.file.flush() + os.fsync(self.file.fileno()) + + def close(self): + self.console.close() + if self.file is not None: + self.file.close() + + +def setup_logger(output=None): + if output is None: + return + + if output.endswith(".txt") or output.endswith(".log"): + fpath = output + else: + fpath = osp.join(output, "log.txt") + + if osp.exists(fpath): + # make sure the existing log file is not over-written + fpath += time.strftime("-%Y-%m-%d-%H-%M-%S") + + sys.stdout = Logger(fpath) diff --git a/Dassl.pytorch/dassl/utils/meters.py b/Dassl.pytorch/dassl/utils/meters.py new file mode 100644 index 0000000000000000000000000000000000000000..a779b5915386a47a62e3a44d8260cc8d79a0c634 --- /dev/null +++ b/Dassl.pytorch/dassl/utils/meters.py @@ -0,0 +1,80 @@ +from collections import defaultdict +import torch + +__all__ = ["AverageMeter", "MetricMeter"] + + +class AverageMeter: + """Compute and store the average and current value. + + Examples:: + >>> # 1. Initialize a meter to record loss + >>> losses = AverageMeter() + >>> # 2. Update meter after every mini-batch update + >>> losses.update(loss_value, batch_size) + """ + + def __init__(self, ema=False): + """ + Args: + ema (bool, optional): apply exponential moving average. + """ + self.ema = ema + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if isinstance(val, torch.Tensor): + val = val.item() + + self.val = val + self.sum += val * n + self.count += n + + if self.ema: + self.avg = self.avg * 0.9 + self.val * 0.1 + else: + self.avg = self.sum / self.count + + +class MetricMeter: + """Store the average and current value for a set of metrics. + + Examples:: + >>> # 1. Create an instance of MetricMeter + >>> metric = MetricMeter() + >>> # 2. Update using a dictionary as input + >>> input_dict = {'loss_1': value_1, 'loss_2': value_2} + >>> metric.update(input_dict) + >>> # 3. Convert to string and print + >>> print(str(metric)) + """ + + def __init__(self, delimiter=" "): + self.meters = defaultdict(AverageMeter) + self.delimiter = delimiter + + def update(self, input_dict): + if input_dict is None: + return + + if not isinstance(input_dict, dict): + raise TypeError( + "Input to MetricMeter.update() must be a dictionary" + ) + + for k, v in input_dict.items(): + if isinstance(v, torch.Tensor): + v = v.item() + self.meters[k].update(v) + + def __str__(self): + output_str = [] + for name, meter in self.meters.items(): + output_str.append(f"{name} {meter.val:.4f} ({meter.avg:.4f})") + return self.delimiter.join(output_str) diff --git a/Dassl.pytorch/dassl/utils/registry.py b/Dassl.pytorch/dassl/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..5079784e6155f1888803b3960576ceb972411ab9 --- /dev/null +++ b/Dassl.pytorch/dassl/utils/registry.py @@ -0,0 +1,69 @@ +""" +Modified from https://github.com/facebookresearch/fvcore +""" +__all__ = ["Registry"] + + +class Registry: + """A registry providing name -> object mapping, to support + custom modules. + + To create a registry (e.g. a backbone registry): + + .. code-block:: python + + BACKBONE_REGISTRY = Registry('BACKBONE') + + To register an object: + + .. code-block:: python + + @BACKBONE_REGISTRY.register() + class MyBackbone(nn.Module): + ... + + Or: + + .. code-block:: python + + BACKBONE_REGISTRY.register(MyBackbone) + """ + + def __init__(self, name): + self._name = name + self._obj_map = dict() + + def _do_register(self, name, obj, force=False): + if name in self._obj_map and not force: + raise KeyError( + 'An object named "{}" was already ' + 'registered in "{}" registry'.format(name, self._name) + ) + + self._obj_map[name] = obj + + def register(self, obj=None, force=False): + if obj is None: + # Used as a decorator + def wrapper(fn_or_class): + name = fn_or_class.__name__ + self._do_register(name, fn_or_class, force=force) + return fn_or_class + + return wrapper + + # Used as a function call + name = obj.__name__ + self._do_register(name, obj, force=force) + + def get(self, name): + if name not in self._obj_map: + raise KeyError( + 'Object name "{}" does not exist ' + 'in "{}" registry'.format(name, self._name) + ) + + return self._obj_map[name] + + def registered_names(self): + return list(self._obj_map.keys()) diff --git a/Dassl.pytorch/dassl/utils/tools.py b/Dassl.pytorch/dassl/utils/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..62d4f30746a2d387fc3a97f4e6f67a3227a26c28 --- /dev/null +++ b/Dassl.pytorch/dassl/utils/tools.py @@ -0,0 +1,185 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import os +import sys +import json +import time +import errno +import numpy as np +import random +import os.path as osp +import warnings +from difflib import SequenceMatcher +import PIL +import torch +from PIL import Image + +__all__ = [ + "mkdir_if_missing", + "check_isfile", + "read_json", + "write_json", + "set_random_seed", + "download_url", + "read_image", + "collect_env_info", + "listdir_nohidden", + "get_most_similar_str_to_a_from_b", + "check_availability", + "tolist_if_not", +] + + +def mkdir_if_missing(dirname): + """Create dirname if it is missing.""" + if not osp.exists(dirname): + try: + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def check_isfile(fpath): + """Check if the given path is a file. + + Args: + fpath (str): file path. + + Returns: + bool + """ + isfile = osp.isfile(fpath) + if not isfile: + warnings.warn('No file found at "{}"'.format(fpath)) + return isfile + + +def read_json(fpath): + """Read json file from a path.""" + with open(fpath, "r") as f: + obj = json.load(f) + return obj + + +def write_json(obj, fpath): + """Writes to a json file.""" + mkdir_if_missing(osp.dirname(fpath)) + with open(fpath, "w") as f: + json.dump(obj, f, indent=4, separators=(",", ": ")) + + +def set_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def download_url(url, dst): + """Download file from a url to a destination. + + Args: + url (str): url to download file. + dst (str): destination path. + """ + from six.moves import urllib + + print('* url="{}"'.format(url)) + print('* destination="{}"'.format(dst)) + + def _reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + return + duration = time.time() - start_time + progress_size = int(count * block_size) + speed = int(progress_size / (1024*duration)) + percent = int(count * block_size * 100 / total_size) + sys.stdout.write( + "\r...%d%%, %d MB, %d KB/s, %d seconds passed" % + (percent, progress_size / (1024*1024), speed, duration) + ) + sys.stdout.flush() + + urllib.request.urlretrieve(url, dst, _reporthook) + sys.stdout.write("\n") + + +def read_image(path): + """Read image from path using ``PIL.Image``. + + Args: + path (str): path to an image. + + Returns: + PIL image + """ + return Image.open(path).convert("RGB") + + +def collect_env_info(): + """Return env info as a string. + + Code source: github.com/facebookresearch/maskrcnn-benchmark + """ + from torch.utils.collect_env import get_pretty_env_info + + env_str = get_pretty_env_info() + env_str += "\n Pillow ({})".format(PIL.__version__) + return env_str + + +def listdir_nohidden(path, sort=False): + """List non-hidden items in a directory. + + Args: + path (str): directory path. + sort (bool): sort the items. + """ + items = [f for f in os.listdir(path) if not f.startswith(".")] + if sort: + items.sort() + return items + + +def get_most_similar_str_to_a_from_b(a, b): + """Return the most similar string to a in b. + + Args: + a (str): probe string. + b (list): a list of candidate strings. + """ + highest_sim = 0 + chosen = None + for candidate in b: + sim = SequenceMatcher(None, a, candidate).ratio() + if sim >= highest_sim: + highest_sim = sim + chosen = candidate + return chosen + + +def check_availability(requested, available): + """Check if an element is available in a list. + + Args: + requested (str): probe string. + available (list): a list of available strings. + """ + if requested not in available: + psb_ans = get_most_similar_str_to_a_from_b(requested, available) + raise ValueError( + "The requested one is expected " + "to belong to {}, but got [{}] " + "(do you mean [{}]?)".format(available, requested, psb_ans) + ) + + +def tolist_if_not(x): + """Convert to a list.""" + if not isinstance(x, list): + x = [x] + return x diff --git a/Dassl.pytorch/dassl/utils/torchtools.py b/Dassl.pytorch/dassl/utils/torchtools.py new file mode 100644 index 0000000000000000000000000000000000000000..2e14fa31fa4c02702c738b5889fa003e51e00a98 --- /dev/null +++ b/Dassl.pytorch/dassl/utils/torchtools.py @@ -0,0 +1,347 @@ +""" +Modified from https://github.com/KaiyangZhou/deep-person-reid +""" +import pickle +import shutil +import os.path as osp +import warnings +from functools import partial +from collections import OrderedDict +import torch +import torch.nn as nn + +from .tools import mkdir_if_missing + +__all__ = [ + "save_checkpoint", + "load_checkpoint", + "resume_from_checkpoint", + "open_all_layers", + "open_specified_layers", + "count_num_param", + "load_pretrained_weights", + "init_network_weights", +] + + +def save_checkpoint( + state, + save_dir, + is_best=False, + remove_module_from_keys=True, + model_name="" +): + r"""Save checkpoint. + + Args: + state (dict): dictionary. + save_dir (str): directory to save checkpoint. + is_best (bool, optional): if True, this checkpoint will be copied and named + ``model-best.pth.tar``. Default is False. + remove_module_from_keys (bool, optional): whether to remove "module." + from layer names. Default is True. + model_name (str, optional): model name to save. + """ + mkdir_if_missing(save_dir) + + if remove_module_from_keys: + # remove 'module.' in state_dict's keys + state_dict = state["state_dict"] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith("module."): + k = k[7:] + new_state_dict[k] = v + state["state_dict"] = new_state_dict + + # save model + epoch = state["epoch"] + if not model_name: + model_name = "model.pth.tar-" + str(epoch) + fpath = osp.join(save_dir, model_name) + torch.save(state, fpath) + print(f"Checkpoint saved to {fpath}") + + # save current model name + checkpoint_file = osp.join(save_dir, "checkpoint") + checkpoint = open(checkpoint_file, "w+") + checkpoint.write("{}\n".format(osp.basename(fpath))) + checkpoint.close() + + if is_best: + best_fpath = osp.join(osp.dirname(fpath), "model-best.pth.tar") + shutil.copy(fpath, best_fpath) + print('Best checkpoint saved to "{}"'.format(best_fpath)) + + +def load_checkpoint(fpath): + r"""Load checkpoint. + + ``UnicodeDecodeError`` can be well handled, which means + python2-saved files can be read from python3. + + Args: + fpath (str): path to checkpoint. + + Returns: + dict + + Examples:: + >>> fpath = 'log/my_model/model.pth.tar-10' + >>> checkpoint = load_checkpoint(fpath) + """ + if fpath is None: + raise ValueError("File path is None") + + if not osp.exists(fpath): + raise FileNotFoundError('File is not found at "{}"'.format(fpath)) + + map_location = None if torch.cuda.is_available() else "cpu" + + try: + checkpoint = torch.load(fpath, map_location=map_location) + + except UnicodeDecodeError: + pickle.load = partial(pickle.load, encoding="latin1") + pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") + checkpoint = torch.load( + fpath, pickle_module=pickle, map_location=map_location + ) + + except Exception: + print('Unable to load checkpoint from "{}"'.format(fpath)) + raise + + return checkpoint + + +def resume_from_checkpoint(fdir, model, optimizer=None, scheduler=None): + r"""Resume training from a checkpoint. + + This will load (1) model weights and (2) ``state_dict`` + of optimizer if ``optimizer`` is not None. + + Args: + fdir (str): directory where the model was saved. + model (nn.Module): model. + optimizer (Optimizer, optional): an Optimizer. + scheduler (Scheduler, optional): an Scheduler. + + Returns: + int: start_epoch. + + Examples:: + >>> fdir = 'log/my_model' + >>> start_epoch = resume_from_checkpoint(fdir, model, optimizer, scheduler) + """ + with open(osp.join(fdir, "checkpoint"), "r") as checkpoint: + model_name = checkpoint.readlines()[0].strip("\n") + fpath = osp.join(fdir, model_name) + + print('Loading checkpoint from "{}"'.format(fpath)) + checkpoint = load_checkpoint(fpath) + model.load_state_dict(checkpoint["state_dict"]) + print("Loaded model weights") + + if optimizer is not None and "optimizer" in checkpoint.keys(): + optimizer.load_state_dict(checkpoint["optimizer"]) + print("Loaded optimizer") + + if scheduler is not None and "scheduler" in checkpoint.keys(): + scheduler.load_state_dict(checkpoint["scheduler"]) + print("Loaded scheduler") + + start_epoch = checkpoint["epoch"] + print("Previous epoch: {}".format(start_epoch)) + + return start_epoch + + +def adjust_learning_rate( + optimizer, + base_lr, + epoch, + stepsize=20, + gamma=0.1, + linear_decay=False, + final_lr=0, + max_epoch=100, +): + r"""Adjust learning rate. + + Deprecated. + """ + if linear_decay: + # linearly decay learning rate from base_lr to final_lr + frac_done = epoch / max_epoch + lr = frac_done*final_lr + (1.0-frac_done) * base_lr + else: + # decay learning rate by gamma for every stepsize + lr = base_lr * (gamma**(epoch // stepsize)) + + for param_group in optimizer.param_groups: + param_group["lr"] = lr + + +def set_bn_to_eval(m): + r"""Set BatchNorm layers to eval mode.""" + # 1. no update for running mean and var + # 2. scale and shift parameters are still trainable + classname = m.__class__.__name__ + if classname.find("BatchNorm") != -1: + m.eval() + + +def open_all_layers(model): + r"""Open all layers in model for training. + + Examples:: + >>> open_all_layers(model) + """ + model.train() + for p in model.parameters(): + p.requires_grad = True + + +def open_specified_layers(model, open_layers): + r"""Open specified layers in model for training while keeping + other layers frozen. + + Args: + model (nn.Module): neural net model. + open_layers (str or list): layers open for training. + + Examples:: + >>> # Only model.classifier will be updated. + >>> open_layers = 'classifier' + >>> open_specified_layers(model, open_layers) + >>> # Only model.fc and model.classifier will be updated. + >>> open_layers = ['fc', 'classifier'] + >>> open_specified_layers(model, open_layers) + """ + if isinstance(model, nn.DataParallel): + model = model.module + + if isinstance(open_layers, str): + open_layers = [open_layers] + + for layer in open_layers: + assert hasattr(model, layer), f"{layer} is not an attribute" + + for name, module in model.named_children(): + if name in open_layers: + module.train() + for p in module.parameters(): + p.requires_grad = True + else: + module.eval() + for p in module.parameters(): + p.requires_grad = False + + +def count_num_param(model=None, params=None): + r"""Count number of parameters in a model. + + Args: + model (nn.Module): network model. + params: network model`s params. + Examples:: + >>> model_size = count_num_param(model) + """ + + if model is not None: + return sum(p.numel() for p in model.parameters()) + + if params is not None: + s = 0 + for p in params: + if isinstance(p, dict): + s += p["params"].numel() + else: + s += p.numel() + return s + + raise ValueError("model and params must provide at least one.") + + +def load_pretrained_weights(model, weight_path): + r"""Load pretrianed weights to model. + + Features:: + - Incompatible layers (unmatched in name or size) will be ignored. + - Can automatically deal with keys containing "module.". + + Args: + model (nn.Module): network model. + weight_path (str): path to pretrained weights. + + Examples:: + >>> weight_path = 'log/my_model/model-best.pth.tar' + >>> load_pretrained_weights(model, weight_path) + """ + checkpoint = load_checkpoint(weight_path) + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + else: + state_dict = checkpoint + + model_dict = model.state_dict() + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + + for k, v in state_dict.items(): + if k.startswith("module."): + k = k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + warnings.warn( + f"Cannot load {weight_path} (check the key names manually)" + ) + else: + print(f"Successfully loaded pretrained weights from {weight_path}") + if len(discarded_layers) > 0: + print( + f"Layers discarded due to unmatched keys or size: {discarded_layers}" + ) + + +def init_network_weights(model, init_type="normal", gain=0.02): + + def _init_func(m): + classname = m.__class__.__name__ + + if hasattr(m, "weight") and ( + classname.find("Conv") != -1 or classname.find("Linear") != -1 + ): + if init_type == "normal": + nn.init.normal_(m.weight.data, 0.0, gain) + elif init_type == "xavier": + nn.init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == "kaiming": + nn.init.kaiming_normal_(m.weight.data, a=0, mode="fan_in") + elif init_type == "orthogonal": + nn.init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias.data, 0.0) + + elif classname.find("BatchNorm") != -1: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + + elif classname.find("InstanceNorm") != -1: + if m.weight is not None and m.bias is not None: + nn.init.constant_(m.weight.data, 1.0) + nn.init.constant_(m.bias.data, 0.0) + + model.apply(_init_func) diff --git a/Dassl.pytorch/datasets/da/cifar_stl.py b/Dassl.pytorch/datasets/da/cifar_stl.py new file mode 100644 index 0000000000000000000000000000000000000000..52c16aad44e0bd10adc3dbd08c7641905bf21bcb --- /dev/null +++ b/Dassl.pytorch/datasets/da/cifar_stl.py @@ -0,0 +1,95 @@ +import sys +import pprint as pp +import os.path as osp +from torchvision.datasets import STL10, CIFAR10 + +from dassl.utils import mkdir_if_missing + +cifar_label2name = { + 0: "airplane", + 1: "car", # the original name was 'automobile' + 2: "bird", + 3: "cat", + 4: "deer", + 5: "dog", + 6: "frog", # conflict class + 7: "horse", + 8: "ship", + 9: "truck", +} + +stl_label2name = { + 0: "airplane", + 1: "bird", + 2: "car", + 3: "cat", + 4: "deer", + 5: "dog", + 6: "horse", + 7: "monkey", # conflict class + 8: "ship", + 9: "truck", +} + +new_name2label = { + "airplane": 0, + "bird": 1, + "car": 2, + "cat": 3, + "deer": 4, + "dog": 5, + "horse": 6, + "ship": 7, + "truck": 8, +} + + +def extract_and_save_image(dataset, save_dir, discard, label2name): + if osp.exists(save_dir): + print('Folder "{}" already exists'.format(save_dir)) + return + + print('Extracting images to "{}" ...'.format(save_dir)) + mkdir_if_missing(save_dir) + + for i in range(len(dataset)): + img, label = dataset[i] + if label == discard: + continue + class_name = label2name[label] + label_new = new_name2label[class_name] + class_dir = osp.join( + save_dir, + str(label_new).zfill(3) + "_" + class_name + ) + mkdir_if_missing(class_dir) + impath = osp.join(class_dir, str(i + 1).zfill(5) + ".jpg") + img.save(impath) + + +def download_and_prepare(name, root, discarded_label, label2name): + print("Dataset: {}".format(name)) + print("Root: {}".format(root)) + print("Old labels:") + pp.pprint(label2name) + print("Discarded label: {}".format(discarded_label)) + print("New labels:") + pp.pprint(new_name2label) + + if name == "cifar": + train = CIFAR10(root, train=True, download=True) + test = CIFAR10(root, train=False) + else: + train = STL10(root, split="train", download=True) + test = STL10(root, split="test") + + train_dir = osp.join(root, name, "train") + test_dir = osp.join(root, name, "test") + + extract_and_save_image(train, train_dir, discarded_label, label2name) + extract_and_save_image(test, test_dir, discarded_label, label2name) + + +if __name__ == "__main__": + download_and_prepare("cifar", sys.argv[1], 6, cifar_label2name) + download_and_prepare("stl", sys.argv[1], 7, stl_label2name) diff --git a/Dassl.pytorch/datasets/da/digit5.py b/Dassl.pytorch/datasets/da/digit5.py new file mode 100644 index 0000000000000000000000000000000000000000..500511dc8bf118e32f750271505e786513038fa6 --- /dev/null +++ b/Dassl.pytorch/datasets/da/digit5.py @@ -0,0 +1,131 @@ +import os +import numpy as np +import os.path as osp +import argparse +from PIL import Image +from scipy.io import loadmat + + +def mkdir_if_missing(directory): + if not osp.exists(directory): + os.makedirs(directory) + + +def extract_and_save(data, label, save_dir): + for i, (x, y) in enumerate(zip(data, label)): + if x.shape[2] == 1: + x = np.repeat(x, 3, axis=2) + if y == 10: + y = 0 + x = Image.fromarray(x, mode="RGB") + save_path = osp.join( + save_dir, + str(i + 1).zfill(6) + "_" + str(y) + ".jpg" + ) + x.save(save_path) + + +def load_mnist(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "mnist_data.mat") + data = loadmat(filepath) + + train_data = np.reshape(data["train_32"], (55000, 32, 32, 1)) + test_data = np.reshape(data["test_32"], (10000, 32, 32, 1)) + + train_label = np.nonzero(data["label_train"])[1] + test_label = np.nonzero(data["label_test"])[1] + + return train_data, test_data, train_label, test_label + + +def load_mnist_m(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "mnistm_with_label.mat") + data = loadmat(filepath) + + train_data = data["train"] + test_data = data["test"] + + train_label = np.nonzero(data["label_train"])[1] + test_label = np.nonzero(data["label_test"])[1] + + return train_data, test_data, train_label, test_label + + +def load_svhn(data_dir, raw_data_dir): + train = loadmat(osp.join(raw_data_dir, "svhn_train_32x32.mat")) + train_data = train["X"].transpose(3, 0, 1, 2) + train_label = train["y"][:, 0] + + test = loadmat(osp.join(raw_data_dir, "svhn_test_32x32.mat")) + test_data = test["X"].transpose(3, 0, 1, 2) + test_label = test["y"][:, 0] + + return train_data, test_data, train_label, test_label + + +def load_syn(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "syn_number.mat") + data = loadmat(filepath) + + train_data = data["train_data"] + test_data = data["test_data"] + + train_label = data["train_label"][:, 0] + test_label = data["test_label"][:, 0] + + return train_data, test_data, train_label, test_label + + +def load_usps(data_dir, raw_data_dir): + filepath = osp.join(raw_data_dir, "usps_28x28.mat") + data = loadmat(filepath)["dataset"] + + train_data = data[0][0].transpose(0, 2, 3, 1) + test_data = data[1][0].transpose(0, 2, 3, 1) + + train_data *= 255 + test_data *= 255 + + train_data = train_data.astype(np.uint8) + test_data = test_data.astype(np.uint8) + + train_label = data[0][1][:, 0] + test_label = data[1][1][:, 0] + + return train_data, test_data, train_label, test_label + + +def main(data_dir): + data_dir = osp.abspath(osp.expanduser(data_dir)) + raw_data_dir = osp.join(data_dir, "Digit-Five") + + if not osp.exists(data_dir): + raise FileNotFoundError('"{}" does not exist'.format(data_dir)) + + datasets = ["mnist", "mnist_m", "svhn", "syn", "usps"] + + for name in datasets: + print("Creating {}".format(name)) + + output = eval("load_" + name)(data_dir, raw_data_dir) + train_data, test_data, train_label, test_label = output + + print("# train: {}".format(train_data.shape[0])) + print("# test: {}".format(test_data.shape[0])) + + train_dir = osp.join(data_dir, name, "train_images") + mkdir_if_missing(train_dir) + test_dir = osp.join(data_dir, name, "test_images") + mkdir_if_missing(test_dir) + + extract_and_save(train_data, train_label, train_dir) + extract_and_save(test_data, test_label, test_dir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "data_dir", type=str, help="directory containing Digit-Five/" + ) + args = parser.parse_args() + main(args.data_dir) diff --git a/Dassl.pytorch/datasets/da/visda17.sh b/Dassl.pytorch/datasets/da/visda17.sh new file mode 100644 index 0000000000000000000000000000000000000000..ce98d313c7a43f39bbf02d694d5d50d50fed06b1 --- /dev/null +++ b/Dassl.pytorch/datasets/da/visda17.sh @@ -0,0 +1,24 @@ +# ------------------------------------------------------------------------ +# ROOT is the root directory where you put your domain datasets. +# +# Suppose you wanna put the dataset under $DATA, which stores all the +# domain datasets, run the following command in your terminal to +# download VisDa17: +# +# $ sh visda17.sh $DATA +#------------------------------------------------------------------------ + +ROOT=$1 +mkdir $ROOT/visda17 +cd $ROOT/visda17 + +wget http://csr.bu.edu/ftp/visda17/clf/train.tar +tar xvf train.tar + +wget http://csr.bu.edu/ftp/visda17/clf/validation.tar +tar xvf validation.tar + +wget http://csr.bu.edu/ftp/visda17/clf/test.tar +tar xvf test.tar + +wget https://raw.githubusercontent.com/VisionLearningGroup/taskcv-2017-public/master/classification/data/image_list.txt -O test/image_list.txt \ No newline at end of file diff --git a/Dassl.pytorch/datasets/dg/cifar_c.py b/Dassl.pytorch/datasets/dg/cifar_c.py new file mode 100644 index 0000000000000000000000000000000000000000..f407f85850e13c204250ba55868cfcd24359d9c2 --- /dev/null +++ b/Dassl.pytorch/datasets/dg/cifar_c.py @@ -0,0 +1,73 @@ +""" +This script +- creates a folder named "cifar10_c" under the same directory as 'CIFAR-10-C' +- extracts images from .npy files and save them as .jpg. +""" +import os +import sys +import numpy as np +import os.path as osp +from PIL import Image + +from dassl.utils import mkdir_if_missing + + +def extract_and_save(images, labels, level, dst): + # level denotes the corruption intensity level (0-based) + assert 0 <= level <= 4 + + for i in range(10000): + real_i = i + level*10000 + im = Image.fromarray(images[real_i]) + label = int(labels[real_i]) + category_dir = osp.join(dst, str(label).zfill(3)) + mkdir_if_missing(category_dir) + save_path = osp.join(category_dir, str(i + 1).zfill(5) + ".jpg") + im.save(save_path) + + +def main(npy_folder): + npy_folder = osp.abspath(osp.expanduser(npy_folder)) + dataset_cap = osp.basename(npy_folder) + + assert dataset_cap in ["CIFAR-10-C", "CIFAR-100-C"] + + if dataset_cap == "CIFAR-10-C": + dataset = "cifar10_c" + else: + dataset = "cifar100_c" + + if not osp.exists(npy_folder): + print('The given folder "{}" does not exist'.format(npy_folder)) + + root = osp.dirname(npy_folder) + im_folder = osp.join(root, dataset) + + mkdir_if_missing(im_folder) + + dirnames = os.listdir(npy_folder) + dirnames.remove("labels.npy") + if "README.txt" in dirnames: + dirnames.remove("README.txt") + assert len(dirnames) == 19 + labels = np.load(osp.join(npy_folder, "labels.npy")) + + for dirname in dirnames: + corruption = dirname.split(".")[0] + corruption_folder = osp.join(im_folder, corruption) + mkdir_if_missing(corruption_folder) + + npy_filename = osp.join(npy_folder, dirname) + images = np.load(npy_filename) + assert images.shape[0] == 50000 + + for level in range(5): + dst = osp.join(corruption_folder, str(level + 1)) + mkdir_if_missing(dst) + print('Saving images to "{}"'.format(dst)) + extract_and_save(images, labels, level, dst) + + +if __name__ == "__main__": + # sys.argv[1] contains the path to CIFAR-10-C or CIFAR-100-C + main(sys.argv[1]) diff --git a/Dassl.pytorch/datasets/ssl/cifar10_cifar100_svhn.py b/Dassl.pytorch/datasets/ssl/cifar10_cifar100_svhn.py new file mode 100644 index 0000000000000000000000000000000000000000..ad9aa11cb4d6b7da8e36090a2a1713f4469a78f1 --- /dev/null +++ b/Dassl.pytorch/datasets/ssl/cifar10_cifar100_svhn.py @@ -0,0 +1,50 @@ +import sys +import os.path as osp +from torchvision.datasets import SVHN, CIFAR10, CIFAR100 + +from dassl.utils import mkdir_if_missing + + +def extract_and_save_image(dataset, save_dir): + if osp.exists(save_dir): + print('Folder "{}" already exists'.format(save_dir)) + return + + print('Extracting images to "{}" ...'.format(save_dir)) + mkdir_if_missing(save_dir) + + for i in range(len(dataset)): + img, label = dataset[i] + class_dir = osp.join(save_dir, str(label).zfill(3)) + mkdir_if_missing(class_dir) + impath = osp.join(class_dir, str(i + 1).zfill(5) + ".jpg") + img.save(impath) + + +def download_and_prepare(name, root): + print("Dataset: {}".format(name)) + print("Root: {}".format(root)) + + if name == "cifar10": + train = CIFAR10(root, train=True, download=True) + test = CIFAR10(root, train=False) + elif name == "cifar100": + train = CIFAR100(root, train=True, download=True) + test = CIFAR100(root, train=False) + elif name == "svhn": + train = SVHN(root, split="train", download=True) + test = SVHN(root, split="test", download=True) + else: + raise ValueError + + train_dir = osp.join(root, name, "train") + test_dir = osp.join(root, name, "test") + + extract_and_save_image(train, train_dir) + extract_and_save_image(test, test_dir) + + +if __name__ == "__main__": + download_and_prepare("cifar10", sys.argv[1]) + download_and_prepare("cifar100", sys.argv[1]) + download_and_prepare("svhn", sys.argv[1]) diff --git a/Dassl.pytorch/datasets/ssl/stl10.py b/Dassl.pytorch/datasets/ssl/stl10.py new file mode 100644 index 0000000000000000000000000000000000000000..3f2ed2cb8c589a9281583e0b3094f2460322b9eb --- /dev/null +++ b/Dassl.pytorch/datasets/ssl/stl10.py @@ -0,0 +1,42 @@ +import sys +import os.path as osp +from torchvision.datasets import STL10 + +from dassl.utils import mkdir_if_missing + + +def extract_and_save_image(dataset, save_dir): + if osp.exists(save_dir): + print('Folder "{}" already exists'.format(save_dir)) + return + + print('Extracting images to "{}" ...'.format(save_dir)) + mkdir_if_missing(save_dir) + + for i in range(len(dataset)): + img, label = dataset[i] + if label == -1: + label_name = "none" + else: + label_name = str(label) + imname = str(i).zfill(6) + "_" + label_name + ".jpg" + impath = osp.join(save_dir, imname) + img.save(impath) + + +def download_and_prepare(root): + train = STL10(root, split="train", download=True) + test = STL10(root, split="test") + unlabeled = STL10(root, split="unlabeled") + + train_dir = osp.join(root, "train") + test_dir = osp.join(root, "test") + unlabeled_dir = osp.join(root, "unlabeled") + + extract_and_save_image(train, train_dir) + extract_and_save_image(test, test_dir) + extract_and_save_image(unlabeled, unlabeled_dir) + + +if __name__ == "__main__": + download_and_prepare(sys.argv[1]) diff --git a/Dassl.pytorch/linter.sh b/Dassl.pytorch/linter.sh new file mode 100644 index 0000000000000000000000000000000000000000..9db34f9f86e44fad1263758a6ccaf09eb816c11a --- /dev/null +++ b/Dassl.pytorch/linter.sh @@ -0,0 +1,11 @@ +echo "Running isort" +isort -y -sp . +echo "Done" + +echo "Running yapf" +yapf -i -r -vv -e build . +echo "Done" + +echo "Running flake8" +flake8 . +echo "Done" \ No newline at end of file diff --git a/Dassl.pytorch/requirements.txt b/Dassl.pytorch/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8dbbdfbfa4dae18f63dc2861550040b9b0ebe77 --- /dev/null +++ b/Dassl.pytorch/requirements.txt @@ -0,0 +1,14 @@ +flake8==3.7.9 +yapf==0.29.0 +isort==4.3.21 +yacs +gdown +tb-nightly +future +scipy +scikit-learn +tqdm +ftfy +regex +wilds==1.2.2 +tabulate diff --git a/Dassl.pytorch/setup.py b/Dassl.pytorch/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..b0cbe47b3e0e9bd714e16af1cacea793bdcc4b33 --- /dev/null +++ b/Dassl.pytorch/setup.py @@ -0,0 +1,48 @@ +import numpy as np +import os.path as osp +from setuptools import setup, find_packages + + +def readme(): + with open('README.md') as f: + content = f.read() + return content + + +def find_version(): + version_file = 'dassl/__init__.py' + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +def numpy_include(): + try: + numpy_include = np.get_include() + except AttributeError: + numpy_include = np.get_numpy_include() + return numpy_include + + +def get_requirements(filename='requirements.txt'): + here = osp.dirname(osp.realpath(__file__)) + with open(osp.join(here, filename), 'r') as f: + requires = [line.replace('\n', '') for line in f.readlines()] + return requires + + +setup( + name='dassl', + version=find_version(), + description='Dassl: Domain adaptation and semi-supervised learning', + author='Kaiyang Zhou', + license='MIT', + long_description=readme(), + url='https://github.com/KaiyangZhou/Dassl.pytorch', + packages=find_packages(), + install_requires=get_requirements(), + keywords=[ + 'Domain Adaptation', 'Domain Generalization', + 'Semi-Supervised Learning', 'Pytorch' + ] +) diff --git a/Dassl.pytorch/tools/parse_test_res.py b/Dassl.pytorch/tools/parse_test_res.py new file mode 100644 index 0000000000000000000000000000000000000000..d5105adda4e5385c9439a636acbb1d9cc07dbc0c --- /dev/null +++ b/Dassl.pytorch/tools/parse_test_res.py @@ -0,0 +1,178 @@ +""" +Goal +--- +1. Read test results from log.txt files +2. Compute mean and std across different folders (seeds) + +Usage +--- +Assume the output files are saved under output/my_experiment, +which contains results of different seeds, e.g., + +my_experiment/ + seed1/ + log.txt + seed2/ + log.txt + seed3/ + log.txt + +Run the following command from the root directory: + +$ python tools/parse_test_res.py output/my_experiment + +Add --ci95 to the argument if you wanna get 95% confidence +interval instead of standard deviation: + +$ python tools/parse_test_res.py output/my_experiment --ci95 + +If my_experiment/ has the following structure, + +my_experiment/ + exp-1/ + seed1/ + log.txt + ... + seed2/ + log.txt + ... + seed3/ + log.txt + ... + exp-2/ + ... + exp-3/ + ... + +Run + +$ python tools/parse_test_res.py output/my_experiment --multi-exp +""" +import re +import numpy as np +import os.path as osp +import argparse +from collections import OrderedDict, defaultdict + +from dassl.utils import check_isfile, listdir_nohidden + + +def compute_ci95(res): + return 1.96 * np.std(res) / np.sqrt(len(res)) + + +def parse_function(*metrics, directory="", args=None, end_signal=None): + print("===") + print(f"Parsing files in {directory}") + subdirs = listdir_nohidden(directory, sort=True) + + outputs = [] + + for subdir in subdirs: + fpath = osp.join(directory, subdir, "log.txt") + assert check_isfile(fpath) + good_to_go = False + output = OrderedDict() + + with open(fpath, "r") as f: + lines = f.readlines() + + for line in lines: + line = line.strip() + + if line == end_signal: + good_to_go = True + + for metric in metrics: + match = metric["regex"].search(line) + if match and good_to_go: + if "file" not in output: + output["file"] = fpath + num = float(match.group(1)) + name = metric["name"] + output[name] = num + + if output: + outputs.append(output) + + assert len(outputs) > 0, f"Nothing found in {directory}" + + metrics_results = defaultdict(list) + for output in outputs: + msg = "" + for key, value in output.items(): + if isinstance(value, float): + msg += f"{key}: {value:.1f}%. " + else: + msg += f"{key}: {value}. " + if key != "file": + metrics_results[key].append(value) + print(msg) + + output_results = OrderedDict() + for key, values in metrics_results.items(): + avg = np.mean(values) + std = compute_ci95(values) if args.ci95 else np.std(values) + print(f"* average {key}: {avg:.1f}% +- {std:.1f}%") + output_results[key] = avg + print("===") + + return output_results + + +def main(args, end_signal): + metric = { + "name": args.keyword, + "regex": re.compile(fr"\* {args.keyword}: ([\.\deE+-]+)%"), + } + + if args.multi_exp: + final_results = defaultdict(list) + + for directory in listdir_nohidden(args.directory, sort=True): + directory = osp.join(args.directory, directory) + results = parse_function( + metric, directory=directory, args=args, end_signal=end_signal + ) + + for key, value in results.items(): + final_results[key].append(value) + + print("Average performance") + for key, values in final_results.items(): + avg = np.mean(values) + print(f"* {key}: {avg:.1f}%") + + else: + parse_function( + metric, directory=args.directory, args=args, end_signal=end_signal + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("directory", type=str, help="path to directory") + parser.add_argument( + "--ci95", + action="store_true", + help=r"compute 95\% confidence interval" + ) + parser.add_argument( + "--test-log", action="store_true", help="parse test-only logs" + ) + parser.add_argument( + "--multi-exp", action="store_true", help="parse multiple experiments" + ) + parser.add_argument( + "--keyword", + default="accuracy", + type=str, + help="which keyword to extract" + ) + args = parser.parse_args() + + end_signal = "Finish training" # needs to be adapted to the latest + if args.test_log: + end_signal = "=> result" + + main(args, end_signal) diff --git a/Dassl.pytorch/tools/replace_text.py b/Dassl.pytorch/tools/replace_text.py new file mode 100644 index 0000000000000000000000000000000000000000..71761544070594034f93afbb75714ebe5177ffcc --- /dev/null +++ b/Dassl.pytorch/tools/replace_text.py @@ -0,0 +1,69 @@ +""" +Replace text in python files. +""" +import glob +import os.path as osp +import argparse +import fileinput + +EXTENSION = ".py" + + +def is_python_file(filename): + ext = osp.splitext(filename)[1] + return ext == EXTENSION + + +def update_file(filename, text_to_search, replacement_text): + print("Processing {}".format(filename)) + with fileinput.FileInput(filename, inplace=True, backup="") as file: + for line in file: + print(line.replace(text_to_search, replacement_text), end="") + + +def recursive_update(directory, text_to_search, replacement_text): + filenames = glob.glob(osp.join(directory, "*")) + + for filename in filenames: + if osp.isfile(filename): + if not is_python_file(filename): + continue + update_file(filename, text_to_search, replacement_text) + elif osp.isdir(filename): + recursive_update(filename, text_to_search, replacement_text) + else: + raise NotImplementedError + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "file_or_dir", type=str, help="path to file or directory" + ) + parser.add_argument("text_to_search", type=str, help="name to be replaced") + parser.add_argument("replacement_text", type=str, help="new name") + parser.add_argument( + "--ext", type=str, default=".py", help="file extension" + ) + args = parser.parse_args() + + file_or_dir = args.file_or_dir + text_to_search = args.text_to_search + replacement_text = args.replacement_text + extension = args.ext + + global EXTENSION + EXTENSION = extension + + if osp.isfile(file_or_dir): + if not is_python_file(file_or_dir): + return + update_file(file_or_dir, text_to_search, replacement_text) + elif osp.isdir(file_or_dir): + recursive_update(file_or_dir, text_to_search, replacement_text) + else: + raise NotImplementedError + + +if __name__ == "__main__": + main() diff --git a/Dassl.pytorch/tools/train.py b/Dassl.pytorch/tools/train.py new file mode 100644 index 0000000000000000000000000000000000000000..106cd19cbffe2c87fb484c2bce2c6db944f9c722 --- /dev/null +++ b/Dassl.pytorch/tools/train.py @@ -0,0 +1,191 @@ +import argparse +import torch + +from dassl.utils import setup_logger, set_random_seed, collect_env_info +from dassl.config import clean_cfg, get_cfg_default +from dassl.engine import build_trainer + + +def print_args(args, cfg): + print("***************") + print("** Arguments **") + print("***************") + optkeys = list(args.__dict__.keys()) + optkeys.sort() + for key in optkeys: + print("{}: {}".format(key, args.__dict__[key])) + print("************") + print("** Config **") + print("************") + print(cfg) + + +def reset_cfg(cfg, args): + if args.root: + cfg.DATASET.ROOT = args.root + + if args.output_dir: + cfg.OUTPUT_DIR = args.output_dir + + if args.resume: + cfg.RESUME = args.resume + + if args.seed: + cfg.SEED = args.seed + + if args.source_domains: + cfg.DATASET.SOURCE_DOMAINS = args.source_domains + + if args.target_domains: + cfg.DATASET.TARGET_DOMAINS = args.target_domains + + if args.transforms: + cfg.INPUT.TRANSFORMS = args.transforms + + if args.trainer: + cfg.TRAINER.NAME = args.trainer + + if args.backbone: + cfg.MODEL.BACKBONE.NAME = args.backbone + + if args.head: + cfg.MODEL.HEAD.NAME = args.head + + +def extend_cfg(cfg): + """ + Add new config variables. + + E.g. + from yacs.config import CfgNode as CN + cfg.TRAINER.MY_MODEL = CN() + cfg.TRAINER.MY_MODEL.PARAM_A = 1. + cfg.TRAINER.MY_MODEL.PARAM_B = 0.5 + cfg.TRAINER.MY_MODEL.PARAM_C = False + """ + pass + + +def setup_cfg(args): + cfg = get_cfg_default() + extend_cfg(cfg) + + # 1. From the dataset config file + if args.dataset_config_file: + cfg.merge_from_file(args.dataset_config_file) + + # 2. From the method config file + if args.config_file: + cfg.merge_from_file(args.config_file) + + # 3. From input arguments + reset_cfg(cfg, args) + + # 4. From optional input arguments + cfg.merge_from_list(args.opts) + + clean_cfg(cfg, args.trainer) + cfg.freeze() + + return cfg + + +def main(args): + cfg = setup_cfg(args) + if cfg.SEED >= 0: + print("Setting fixed seed: {}".format(cfg.SEED)) + set_random_seed(cfg.SEED) + setup_logger(cfg.OUTPUT_DIR) + + if torch.cuda.is_available() and cfg.USE_CUDA: + torch.backends.cudnn.benchmark = True + + print_args(args, cfg) + print("Collecting env info ...") + print("** System info **\n{}\n".format(collect_env_info())) + + trainer = build_trainer(cfg) + + if args.eval_only: + trainer.load_model(args.model_dir, epoch=args.load_epoch) + trainer.test() + return + + if not args.no_train: + trainer.train() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--root", type=str, default="", help="path to dataset") + parser.add_argument( + "--output-dir", type=str, default="", help="output directory" + ) + parser.add_argument( + "--resume", + type=str, + default="", + help="checkpoint directory (from which the training resumes)", + ) + parser.add_argument( + "--seed", + type=int, + default=-1, + help="only positive value enables a fixed seed" + ) + parser.add_argument( + "--source-domains", + type=str, + nargs="+", + help="source domains for DA/DG" + ) + parser.add_argument( + "--target-domains", + type=str, + nargs="+", + help="target domains for DA/DG" + ) + parser.add_argument( + "--transforms", type=str, nargs="+", help="data augmentation methods" + ) + parser.add_argument( + "--config-file", type=str, default="", help="path to config file" + ) + parser.add_argument( + "--dataset-config-file", + type=str, + default="", + help="path to config file for dataset setup", + ) + parser.add_argument( + "--trainer", type=str, default="", help="name of trainer" + ) + parser.add_argument( + "--backbone", type=str, default="", help="name of CNN backbone" + ) + parser.add_argument("--head", type=str, default="", help="name of head") + parser.add_argument( + "--eval-only", action="store_true", help="evaluation only" + ) + parser.add_argument( + "--model-dir", + type=str, + default="", + help="load model from this directory for eval-only mode", + ) + parser.add_argument( + "--load-epoch", + type=int, + help="load model weights at this epoch for evaluation" + ) + parser.add_argument( + "--no-train", action="store_true", help="do not call trainer.train()" + ) + parser.add_argument( + "opts", + default=None, + nargs=argparse.REMAINDER, + help="modify config options using the command-line", + ) + args = parser.parse_args() + main(args) diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7d353f4e431712e6136d01748416189b1f0e40cb --- /dev/null +++ b/README.md @@ -0,0 +1,152 @@ +# Vision-Language **Su**bspace **Pr**ompting (SuPr) + +Official implementation of the paper: **"Vision-Language Subspace Prompting"**. + +--- + +# 📚 Table of Contents +- [Vision-Language **Su**bspace **Pr**ompting (SuPr)](#vision-language-subspace-prompting-supr) +- [📚 Table of Contents](#-table-of-contents) + - [🚀 News](#-news) + - [✨ Introduction](#-introduction) + - [📦 Supported Methods](#-supported-methods) + - [📊 Results](#-results) + - [🎨 Visualization](#-visualization) + - [⚙️ Installation](#️-installation) + - [📂 Data Preparation](#-data-preparation) + - [🏛️ Model Zoo](#️-model-zoo) + - [🏋️ Training](#️-training) + - [📈 Evaluation](#-evaluation) + - [📬 Contact](#-contact) + - [🙏 Acknowledgements](#-acknowledgements) + - [🔖 Citation](#-citation) + +--- + +## 🚀 News +- **(April 27, 2025)** + - Released pre-trained models and evaluation scripts to reproduce SuPr's official benchmark results. + - Released training scripts for [SuPr](configs/trainers/SuPr). + - This repository also supports other prompting methods, including [DePT (CVPR'24)](configs/trainers/PromptSRC), [TCP (CVPR'24)](configs/trainers/TCP), [PromptSRC (ICCV'23)](configs/trainers/PromptSRC), [KgCoOp (CVPR'23)](configs/trainers/KgCoOp), [MaPLe (CVPR'23)](configs/trainers/MaPLe), [CoOp (IJCV'22)](configs/trainers/CoOp), and [Co-CoOp (CVPR'22)](configs/trainers/CoCoOp). + +--- + +## ✨ Introduction + +![Main Insight](docs/insight.jpg) + +In adapting vision-language models like CLIP to downstream tasks, existing methods often struggle to balance task-specific objectives with the need to preserve CLIP’s generalizable embedding space. Traditional regularization techniques constrain optimization flexibility, limiting the adaptability of soft prompts to new tasks (left figure). +In contrast, our **Subspace Prompting (SuPr)** method circumvents this tradeoff. It enables the integration of high-dimensional, semantically rich subspaces that simultaneously capture task-specific knowledge while retaining CLIP's generalizable features (right figure). + +--- + +> **Abstract:** +> Prompting vision-language models (e.g., CLIP) to adapt to downstream tasks has emerged as a crucial research topic. A prominent approach is context optimization, which replaces a subset of text tokens with learnable parameters, known as soft prompts. However, conventional pipelines leverage only a single vector embedding derived from these soft prompts for visual classification. +> This design risks overfitting to base class training data and leads to degraded performance on novel classes. Previous works attempt to address this by regularizing soft prompts toward handcrafted hard prompts. Yet, excessive regularization hampers model adaptability on base classes. +> +> To strike a better balance, we introduce **SuPr**, a subspace-based prompting method. SuPr models a shared subspace between learnable soft prompts and textual hard prompts, enabling flexible yet structured adaptation. This approach achieves superior performance on both base and novel classes. +> +> With the advantages of subspace modeling, SuPr demonstrates strong effectiveness across diverse scenarios, including domain generalization, domain adaptation, cross-dataset transfer, and few-shot learning. Moreover, we provide extensive analysis by visualizing the learned subspace and applying SuPr to text-to-image generation tasks to understand the nature of the learned prompts. + +--- + +## 📦 Supported Methods + +| Method | Paper/Reference | Configurations | Training Scripts | +|----------------------------|-------------------------------------------------|----------------|------------------| +| Independent V-L Prompting | - | [link](configs/trainers/IVLP/) | [link](scripts/independent-vlp) | +| CoOp | [IJCV 2022](https://arxiv.org/abs/2109.01134) | [link](configs/trainers/CoOp) | [link](scripts/coop) | +| Co-CoOp | [CVPR 2022](https://arxiv.org/abs/2203.05557) | [link](configs/trainers/CoCoOp) | [link](scripts/cocoop) | +| MaPLe | [CVPR 2023](https://arxiv.org/abs/2210.03117) | [link](configs/trainers/MaPLe) | [link](scripts/maple) | +| KgCoOp | [CVPR 2023](https://openaccess.thecvf.com/content/CVPR2023/html/Yao_Visual-Language_Prompt_Tuning_With_Knowledge-Guided_Context_Optimization_CVPR_2023_paper.html) | [link](configs/trainers/KgCoOp/) | [link](scripts/kgcoop) | +| PromptSRC | [ICCV 2023](https://arxiv.org/abs/2307.06948) | [link](configs/trainers/PromptSRC/) | [link](scripts/promptsrc) | +| TCP | [CVPR 2024](https://openaccess.thecvf.com/content/CVPR2024/html/Yao_TCPTextual-based_Class-aware_Prompt_tuning_for_Visual-Language_Model_CVPR_2024_paper.html) | [link](configs/trainers/TCP/) | [link](scripts/tcp) | +| DePT | [CVPR 2024](https://openaccess.thecvf.com/content/CVPR2024/html/Zhang_DePT_Decoupled_Prompt_Tuning_CVPR_2024_paper.html) | [link](configs/trainers/PromptSRC/) | [link](scripts/dept) | +| SuPr (ours) | [arXiv](https://arxiv.org/abs/2307.06948) | [link](configs/trainers/SuPr/) | [link](scripts/supr) | + +--- + +## 📊 Results + +| Model | Base Accuracy | Novel Accuracy | Harmonic Mean (HM) | +|----------------------------|:-------------:|:--------------:|:-----------------:| +| CLIP | 69.34 | 74.22 | 71.70 | +| Independent V-L Prompting | 84.14 | 71.42 | 77.26 | +| **SuPr (Ours)** | **84.15** | **76.48** | **80.13** | + +--- + +## 🎨 Visualization + +SuPr's subspace modeling captures diverse intra-class variations, including fine-grained features like color, texture, and depiction style. This enables richer semantic representations compared to traditional soft prompts, which often focus only on dominant concepts. Additionally, interpolations within the subspace reveal smooth semantic transitions along various attributes. + +![Subspace Visualization](docs/vis.jpg) + +![Subspace Walking](docs/walking.jpg){width="330"} + +--- + +## ⚙️ Installation + +Please follow the instructions in [INSTALL.md](docs/INSTALL.md) for environment setup and package requirements. + +--- + +## 📂 Data Preparation + +Datasets required for training and evaluation can be prepared by following [DATASETS.md](docs/DATASETS.md). + +--- + +## 🏛️ Model Zoo + +| Configurations | Model Checkpoints | +|----------------|:-----------------:| +| [SuPr](configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml) | [link](https://mbzuaiac-my.sharepoint.com/:f:/g/personal/syed_wasim_mbzuai_ac_ae/EqFXPs2Zl9pKp39w3SqlR7QBDACTv-AgCXH6_cGflrUFwg?e=l33EBA) | +| [SuPr + PromptSRC](configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml) | [link](https://mbzuaiac-my.sharepoint.com/:f:/g/personal/syed_wasim_mbzuai_ac_ae/EqFXPs2Zl9pKp39w3SqlR7QBDACTv-AgCXH6_cGflrUFwg?e=l33EBA) | +| [SuPr Ens](configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml) | [link](https://mbzuaiac-my.sharepoint.com/:f:/g/personal/syed_wasim_mbzuai_ac_ae/EqFXPs2Zl9pKp39w3SqlR7QBDACTv-AgCXH6_cGflrUFwg?e=l33EBA) | + +--- + +## 🏋️ Training + +Please refer to [TRAIN.md](docs/TRAIN.md) for detailed instructions on training SuPr, PromptSRC, and IVLP baselines from scratch. + +--- + +## 📈 Evaluation + +Please refer to [EVAL.md](docs/EVAL.md) for reproducing official results using our pre-trained models. + +--- + +## 📬 Contact + +For questions, issues, or discussions, please open an issue in this repository or contact: **tongyujun@bupt.edu.cn** + +--- + +## 🙏 Acknowledgements + +Our codebase builds upon and extends the following repositories: +- [PromptSRC](https://github.com/muzairkhattak/PromptSRC) +- [MaPLe](https://github.com/muzairkhattak/multimodal-prompt-learning) +- [CoOp and Co-CoOp](https://github.com/KaiyangZhou/CoOp) + +We sincerely thank the authors for sharing their codebases. If you find our work useful, please also consider citing these related works. + +--- + +## 🔖 Citation + +If you find our work useful, please consider citing: + +```bibtex +@misc{supr2025, + title={Vision-Language Subspace Prompting}, + author={Your Name and Collaborators}, + year={2025}, + eprint={2307.06948}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} \ No newline at end of file diff --git a/clip/__init__.py b/clip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dcc5619538c0f7c782508bdbd9587259d805e0d9 --- /dev/null +++ b/clip/__init__.py @@ -0,0 +1 @@ +from .clip import * diff --git a/clip/__pycache__/__init__.cpython-38.pyc b/clip/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29f750c303c477d78a9a33743db8768065120d5a Binary files /dev/null and b/clip/__pycache__/__init__.cpython-38.pyc differ diff --git a/clip/__pycache__/clip.cpython-38.pyc b/clip/__pycache__/clip.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d7c6bc357ff7dd75c2e9ea0bdd3051cc80682a9 Binary files /dev/null and b/clip/__pycache__/clip.cpython-38.pyc differ diff --git a/clip/__pycache__/model.cpython-38.pyc b/clip/__pycache__/model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce21776b030588992c2c959fe1980f8e7e2d66e9 Binary files /dev/null and b/clip/__pycache__/model.cpython-38.pyc differ diff --git a/clip/__pycache__/simple_tokenizer.cpython-38.pyc b/clip/__pycache__/simple_tokenizer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4266c3759de35dafb7a52e8cce29c4b469d84b66 Binary files /dev/null and b/clip/__pycache__/simple_tokenizer.cpython-38.pyc differ diff --git a/clip/bpe_simple_vocab_16e6.txt.gz b/clip/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a15856e00a06a9fbed8cdd34d2393fea4a3113 --- /dev/null +++ b/clip/bpe_simple_vocab_16e6.txt.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a +size 1356917 diff --git a/clip/clip.py b/clip/clip.py new file mode 100644 index 0000000000000000000000000000000000000000..7a7a97f404d95d66367f5c0fcb6beceb0fd1d917 --- /dev/null +++ b/clip/clip.py @@ -0,0 +1,222 @@ +import hashlib +import os +import urllib +import warnings +from typing import Union, List + +import torch +from PIL import Image +from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize +from tqdm import tqdm + +from .model import build_model +from .simple_tokenizer import SimpleTokenizer as _Tokenizer + +try: + from torchvision.transforms import InterpolationMode + BICUBIC = InterpolationMode.BICUBIC +except ImportError: + BICUBIC = Image.BICUBIC + + +if torch.__version__.split(".") < ["1", "7", "1"]: + warnings.warn("PyTorch version 1.7.1 or higher is recommended") + +__all__ = ["available_models", "load", "tokenize"] +_tokenizer = _Tokenizer() + +_MODELS = { + "RN50": "https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt", + "RN101": "https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt", + "RN50x4": "https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt", + "RN50x16": "https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt", + "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", + "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", + "ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt", + "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt", +} + + +def _download(url: str, root: str = os.path.expanduser("~/.cache/clip")): + os.makedirs(root, exist_ok=True) + filename = os.path.basename(url) + + expected_sha256 = url.split("/")[-2] + download_target = os.path.join(root, filename) + + if os.path.exists(download_target) and not os.path.isfile(download_target): + raise RuntimeError(f"{download_target} exists and is not a regular file") + + if os.path.isfile(download_target): + if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256: + return download_target + else: + warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") + + with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: + with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True) as loop: + while True: + buffer = source.read(8192) + if not buffer: + break + + output.write(buffer) + loop.update(len(buffer)) + + if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: + raise RuntimeError(f"Model has been downloaded but the SHA256 checksum does not not match") + + return download_target + + +def _transform(n_px): + return Compose([ + Resize(n_px, interpolation=BICUBIC), + CenterCrop(n_px), + lambda image: image.convert("RGB"), + ToTensor(), + Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), + ]) + + +def available_models() -> List[str]: + """Returns the names of available CLIP models""" + return list(_MODELS.keys()) + + +def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", jit=False): + """Load a CLIP model + + Parameters + ---------- + name : str + A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict + + device : Union[str, torch.device] + The device to put the loaded model + + jit : bool + Whether to load the optimized JIT model or more hackable non-JIT model (default). + + Returns + ------- + model : torch.nn.Module + The CLIP model + + preprocess : Callable[[PIL.Image], torch.Tensor] + A torchvision transform that converts a PIL image into a tensor that the returned model can take as its input + """ + if name in _MODELS: + model_path = _download(_MODELS[name]) + elif os.path.isfile(name): + model_path = name + else: + raise RuntimeError(f"Model {name} not found; available models = {available_models()}") + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location=device if jit else "cpu").eval() + state_dict = None + except RuntimeError: + # loading saved state dict + if jit: + warnings.warn(f"File {model_path} is not a JIT archive. Loading as a state dict instead") + jit = False + state_dict = torch.load(model_path, map_location="cpu") + + if not jit: + model = build_model(state_dict or model.state_dict()).to(device) + if str(device) == "cpu": + model.float() + return model, _transform(model.visual.input_resolution) + + # patch the device names + device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(device)), example_inputs=[]) + device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1] + + def patch_device(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("prim::Constant"): + if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"): + node.copyAttributes(device_node) + + model.apply(patch_device) + patch_device(model.encode_image) + patch_device(model.encode_text) + + # patch dtype to float32 on CPU + if str(device) == "cpu": + float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[]) + float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] + float_node = float_input.node() + + def patch_float(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("aten::to"): + inputs = list(node.inputs()) + for i in [1, 2]: # dtype can be the second or third argument to aten::to() + if inputs[i].node()["value"] == 5: + inputs[i].node().copyAttributes(float_node) + + model.apply(patch_float) + patch_float(model.encode_image) + patch_float(model.encode_text) + + model.float() + + return model, _transform(model.input_resolution.item()) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + truncate: bool + Whether to truncate the text in case its encoding is longer than the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + if truncate: + tokens = tokens[:context_length] + tokens[-1] = eot_token + else: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = torch.tensor(tokens) + + return result diff --git a/clip/model.py b/clip/model.py new file mode 100644 index 0000000000000000000000000000000000000000..29c59305284956136ff2c67420e4bafe4b9dc599 --- /dev/null +++ b/clip/model.py @@ -0,0 +1,908 @@ +from collections import OrderedDict +from typing import Tuple, Union + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + + self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.avgpool = nn.AvgPool2d(stride) if stride > 1 else nn.Identity() + + self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + + self.relu = nn.ReLU(inplace=True) + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1 + self.downsample = nn.Sequential(OrderedDict([ + ("-1", nn.AvgPool2d(stride)), + ("0", nn.Conv2d(inplanes, planes * self.expansion, 1, stride=1, bias=False)), + ("1", nn.BatchNorm2d(planes * self.expansion)) + ])) + + def forward(self, x: torch.Tensor): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Module): + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + self.positional_embedding = nn.Parameter(torch.randn(spacial_dim ** 2 + 1, embed_dim) / embed_dim ** 0.5) + self.k_proj = nn.Linear(embed_dim, embed_dim) + self.q_proj = nn.Linear(embed_dim, embed_dim) + self.v_proj = nn.Linear(embed_dim, embed_dim) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim) + self.num_heads = num_heads + + def forward(self, x): + x = x.reshape(x.shape[0], x.shape[1], x.shape[2] * x.shape[3]).permute(2, 0, 1) # NCHW -> (HW)NC + x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0) # (HW+1)NC + x = x + self.positional_embedding[:, None, :].to(x.dtype) # (HW+1)NC + x, _ = F.multi_head_attention_forward( + query=x, key=x, value=x, + embed_dim_to_check=x.shape[-1], + num_heads=self.num_heads, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat([self.q_proj.bias, self.k_proj.bias, self.v_proj.bias]), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0, + out_proj_weight=self.c_proj.weight, + out_proj_bias=self.c_proj.bias, + use_separate_proj_weight=True, + training=self.training, + need_weights=False + ) + + return x[0] + + +class ModifiedResNet(nn.Module): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2d(3, width // 2, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(width // 2) + self.conv2 = nn.Conv2d(width // 2, width // 2, kernel_size=3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(width // 2) + self.conv3 = nn.Conv2d(width // 2, width, kernel_size=3, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(width) + self.avgpool = nn.AvgPool2d(2) + self.relu = nn.ReLU(inplace=True) + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class LayerNorm(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + ret = super().forward(x.type(torch.float32)) + return ret.type(orig_type) + + +class QuickGELU(nn.Module): + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential(OrderedDict([ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)) + ])) + self.ln_2 = LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor): + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class ResidualAttentionBlock_IVLP(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None, add_prompt=False, + text_layer=False, i=0, design_details=None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential(OrderedDict([ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)) + ])) + self.ln_2 = LayerNorm(d_model) + # Only add learnable tokens if flag is set True + # For the first iteration i, we should not add the learnable parameters + # as it is already been taken care of in the very start, for both text + # and the visual branch + self.text_layer = text_layer + self.attn_mask = attn_mask + if i != 0: + self.add_prompt = add_prompt + if self.add_prompt: + if self.text_layer: + self.n_ctx_text = design_details["language_ctx"] # hyperparameter + ctx_vectors = torch.empty(self.n_ctx_text, d_model) + else: + self.n_ctx_visual = design_details["vision_ctx"] # hyperparameter + ctx_vectors = torch.empty(self.n_ctx_visual, d_model) + # Code snippet for per layer visual prompts + nn.init.normal_(ctx_vectors, std=0.02) + self.VPT_shallow = nn.Parameter(ctx_vectors) + else: + self.add_prompt = False + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor): + # Will need to append the learnable tokens for this layer here + # Check if flag was set for this layer or not + if self.add_prompt: + # Also see if this is textual transformer layer or not + if not self.text_layer: + # Remove the outputs produced by learnable tokens of previous layer + prefix = x[0:x.shape[0] - self.n_ctx_visual, :, :] + # Create/configure learnable tokens of this layer + visual_context = self.VPT_shallow.expand(x.shape[1], -1, -1).permute(1, 0, 2).half() + # Add the learnable tokens of this layer with the input, by replacing the previous + # layer learnable tokens + x = torch.cat([prefix, visual_context], dim=0) + else: + # Appending the learnable tokens in different way + # x -> [77, NCLS, DIM] + # First remove the learnable tokens from previous layer + prefix = x[:1, :, :] + suffix = x[1 + self.n_ctx_text:, :, :] + # Create/configure learnable tokens of this layer + textual_context = self.VPT_shallow.expand(x.shape[1], -1, -1).permute(1, 0, 2).half() + # Add the learnable tokens of this layer with the input, replaced by previous + # layer learnable tokens + x = torch.cat([prefix, textual_context, suffix], dim=0) + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class ResidualAttentionBlock_MaPLe(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None, design_details=None, + text_layer=False, i=0): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential(OrderedDict([ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)) + ])) + self.ln_2 = LayerNorm(d_model) + # For the first iteration i, we do not need to add the learnable parameters here + # as it will be added in the beginning, for both text and the vision branch + self.text_layer = text_layer + self.attn_mask = attn_mask + # This must be consistent with the config file prompt + self.compound_prompt_nctx = design_details['maple_length'] + if i == 0: + self.first_layer = True + else: + self.first_layer = False + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, inputs): + # For the first layer, we do not need to add any duplicate, as it is already added + # as the shallow version + x = inputs[0] + compound_prompts_deeper = inputs[1] + counter = inputs[2] + if not self.first_layer: + if len(compound_prompts_deeper) > 0: + # This means that deeper compound prompts are turned on + # Here it behaves differently for text and visual side + # Forward function is same for both + + if not self.text_layer: + # First check if the ith layer needs compound prompts or not + if not (counter > len(compound_prompts_deeper) - 1): + # Remove the outputs produced by learnable tokens of previous layer + prefix = x[0:x.shape[0] - self.compound_prompt_nctx, :, :] + # Create/configure learnable tokens of this layer + visual_context = compound_prompts_deeper[counter] # extract the correct index + visual_context = visual_context.expand(x.shape[1], -1, -1).permute(1, 0, 2).half() + # Add the learnable tokens of this layer with the input, by replacing previous + # layer learnable tokens + x = torch.cat([prefix, visual_context], dim=0) + + # Once done, update the counter, so that the next time, it does not use same learnable tokens + counter += 1 + else: + # First check if the ith layer needs compound prompts or not + if not (counter > len(compound_prompts_deeper) - 1): + # Appending the learnable tokens in different way + # x -> [77, NCLS, DIM] + # First remove the learnable tokens from previous layer + prefix = x[:1, :, :] + suffix = x[1 + self.compound_prompt_nctx:, :, :] + # Create/configure learnable tokens of this layer + textual_context = compound_prompts_deeper[counter] + textual_context = textual_context.expand(x.shape[1], -1, -1).permute(1, 0, 2).half() + # Add the learnable tokens of this layer with the input, replaced by previous + # layer learnable tokens + x = torch.cat([prefix, textual_context, suffix], dim=0) + # Once done, update the counter, so that the next time, it does not use same learnable tokens + counter += 1 + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return [x, compound_prompts_deeper, counter] # return again as a list, so that nn.seq can work + +class ResidualAttentionBlock_TCP(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential(OrderedDict([ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)) + ])) + self.ln_2 = LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, inputs): + if len(inputs)==4: + x = inputs[0] + compound_prompts_deeper = inputs[1] + weight = inputs[2] + counter = inputs[3] + layers=[8] + weights=[1.0] + if counter in layers: + ind = layers.index(counter) + textual_context = compound_prompts_deeper.permute(1,0,2) + n_ctx=textual_context.shape[0] + prefix = x[:1, :, :] + suffix = x[1+n_ctx:, :, :] + midfix = x[1:1+n_ctx,:,:] + weight = weights[ind] + x = torch.cat([prefix,weight*textual_context+(1-weight)*midfix,suffix], dim=0) + counter += 1 + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return [x, compound_prompts_deeper, weight,counter] # return again as a list, so that nn.seq can work + else: + x = inputs + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class ResidualAttentionBlock_SuPr(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None, add_prompt=False, + text_layer=False, i=0, design_details=None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential(OrderedDict([ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)) + ])) + self.ln_2 = LayerNorm(d_model) + # Only add learnable tokens if flag is set True + # For the first iteration i, we should not add the learnable parameters + # as it is already been taken care of in the very start, for both text + # and the visual branch + self.text_layer = text_layer + self.attn_mask = attn_mask + self.space_dim = design_details["space_dim"] + self.max_name_len = design_details["max_name_len"] + self.pad1 = None + self.pad2 = None + + + if i != 0: + self.add_prompt = add_prompt + if self.add_prompt: + if self.text_layer: + self.n_ctx_text = design_details["language_ctx"] # hyperparameter + ctx_vectors = torch.empty(self.n_ctx_text, d_model) + for i in range (self.space_dim): + self.register_parameter("VPT_scale{}".format(i), nn.Parameter(torch.zeros(self.n_ctx_text,d_model))) + single_para = nn.Parameter(torch.zeros(self.n_ctx_text,d_model)) + nn.init.normal_(single_para, std=0.02) + self.register_parameter("VPT_bias{}".format(i), single_para) + else: + self.n_ctx_visual = design_details["vision_ctx"] # hyperparameter + ctx_vectors = torch.empty(self.n_ctx_visual, d_model) + # Code snippet for per layer visual prompts + nn.init.normal_(ctx_vectors, std=0.02) + self.VPT_shallow = nn.Parameter(ctx_vectors) + else: + self.add_prompt = False + + if self.text_layer: + self.n_ctx_text = design_details["language_ctx"] + + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor): + # Will need to append the learnable tokens for this layer here + + if self.add_prompt: + # Also see if this is textual transformer layer or not + if not self.text_layer: + # Remove the outputs produced by learnable tokens of previous layer + prefix = x[0:x.shape[0] - self.n_ctx_visual, :, :] + # Create/configure learnable tokens of this layer + visual_context = self.VPT_shallow.expand(x.shape[1], -1, -1).permute(1, 0, 2).half() + # Add the learnable tokens of this layer with the input, by replacing the previous + # layer learnable tokens + x = torch.cat([prefix, visual_context], dim=0) + else: + VPT = [self.VPT_shallow.half()] #vector prompt + for i in range(self.space_dim): + # for DP not use parameterlist + scale = getattr(self, f"VPT_scale{i}").half() + bias = getattr(self, f"VPT_bias{i}").half() + ctx = scale * VPT[0] + ctx = ctx + bias + VPT.append(ctx) + VPT = torch.stack(VPT) #space_dim + 1, n_ctx, 512 + + prefix = x[ :, :, :1, :] + suffix = x[ :, :, 1 + self.n_ctx_text:, :] + # Create/configure learnable tokens of this layer + ctx = VPT.expand(x.shape[0], -1, -1, -1).half()# N, space_dim + 1, L, D + # Add the learnable tokens of this layer with the input, replaced by previous + # layer learnable tokens + x = torch.cat([prefix, ctx, suffix], dim=2) + + if self.text_layer:# concat vector/space prompts within one sequence + #(n_cls, space_dim + 1, 77, dim) => (N*, 77, dim) + n_cls, _, _, dim, = x.size() + len_one_sentence = 1 + self.n_ctx_text + self.max_name_len #BOS + Prompt + {Name_Len+.+EOS} + #truncate the input/ remove padding + x_truncate = x[:,:,:len_one_sentence,:] # n_cls, space_dim + 1, len_one_sentence, dim + # calculate sentence per sequence + max_n = 77 // len_one_sentence + x_truncate = x_truncate.flatten(0,1) # (space_dim+1)*n_cls, len_one_sentence, dim + + # (space_dim+1)*n_cls, len_one_sentence, dim => N*, 77, dim + # if the sentence number is not divisible by max_n, pad more sentence + last_drop = 0 + if (x_truncate.shape[0] % max_n): + last_drop = max_n - (x_truncate.shape[0] % max_n) # pad + if self.pad1 is None: + self.pad1 = torch.zeros_like(x_truncate[-last_drop:]).to(dtype=x.dtype, device=x.device) + pad = self.pad1 + x_truncate = torch.cat([x_truncate, pad]) # pad 0 + + # concate sentence in one sequence + x_truncate = x_truncate.reshape(-1, max_n * len_one_sentence, dim) #N*/max_n L D + # if sequence length is less than 77, pad to 77 + if (max_n * len_one_sentence)<77: + pad = x[0:1,0,x_truncate.shape[1]:,:].expand(x_truncate.shape[0],-1,-1) + x_truncate = torch.cat([x_truncate, pad],dim=1) + x = x_truncate.permute(1, 0, 2) + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + + # reshape to the n_cls, space_dim + 1, 77, dim + if self.text_layer: + x = x.permute(1, 0, 2) # N*/max_n L D + x_truncate = x[:, : max_n * len_one_sentence, :] + x_truncate = x_truncate.reshape( -1, len_one_sentence, dim) # N*, len_one_sentence, dim + if last_drop:# drop the sentence padding, i.e. pad1 + x_truncate = x_truncate[:-last_drop] + x_truncate = x_truncate.reshape( n_cls, (self.space_dim + 1), len_one_sentence, dim)# n_cls, space_dim+1, L, D + + # recover to the n_cls, space_dim + 1, 77, dim + if self.pad2 is None: + self.pad2 = torch.zeros([n_cls, (self.space_dim + 1), 77-len_one_sentence,dim]).to(dtype=x.dtype, device=x.device) + pad = self.pad2 + x = torch.cat([x_truncate, pad],dim=2) + return x + + + +class Transformer(nn.Module): + def __init__(self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None, prompts_needed=0, + text_layer=False, design_details=None): + super().__init__() + self.width = width + self.layers = layers + # Implements respective encoder blocks for a given design choice + current_trainer = design_details['trainer'] + if current_trainer == 'IVLP' or current_trainer == 'VPT': + self.resblocks = nn.Sequential(*[ResidualAttentionBlock_IVLP(width, heads, attn_mask, True, text_layer, i, design_details) if prompts_needed > i + else ResidualAttentionBlock_IVLP(width, heads, attn_mask, False, text_layer, i, design_details) + for i in range(layers)]) + elif current_trainer == 'MaPLe': + self.resblocks = nn.Sequential( + *[ResidualAttentionBlock_MaPLe(width, heads, attn_mask, design_details, text_layer, i) + for i in range(layers)]) + + elif current_trainer == 'TCP': + self.resblocks = nn.Sequential( + *[ResidualAttentionBlock_TCP(width, heads, attn_mask) + for _ in range(layers)]) + + elif current_trainer == 'SuPr': + self.resblocks = nn.Sequential(*[ResidualAttentionBlock_SuPr(width, heads, attn_mask, True, text_layer, i, design_details) if prompts_needed > i + else ResidualAttentionBlock_SuPr(width, heads, attn_mask, False, text_layer, i, design_details) + for i in range(layers)]) + + else: + # Corresponds to default CoOp or CoCoOp + assert current_trainer == 'CoOp' or current_trainer == 'CoCoOp' + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x: torch.Tensor): + return self.resblocks(x) + + +class VisionTransformer(nn.Module): + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, + output_dim: int, design_details): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + self.conv1 = nn.Conv2d(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias=False) + current_trainer = design_details['trainer'] + if design_details["vision_depth"] == 0 or current_trainer == 'CoOp' or current_trainer == 'CoCoOp': + self.VPT_shallow = False + else: + self.VPT_shallow = True + if self.VPT_shallow: + # Add visual prompt tokens here + n_ctx = design_details["vision_ctx"] # hyperparameter + ctx_vectors = torch.empty(n_ctx, width) + nn.init.normal_(ctx_vectors, std=0.02) + self.VPT = nn.Parameter(ctx_vectors) + # self.VPT.half() + scale = width ** -0.5 + self.class_embedding = nn.Parameter(scale * torch.randn(width)) + self.positional_embedding = nn.Parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width)) + self.ln_pre = LayerNorm(width) + # hyper-parameter if need to add prompt embeddings inside to the input + # of transformer block or not: + self.prompt_till_layer_visual = design_details["vision_depth"] + self.transformer = Transformer(width, layers, heads, prompts_needed=self.prompt_till_layer_visual, + design_details=design_details) + + self.ln_post = LayerNorm(width) + self.proj = nn.Parameter(scale * torch.randn(width, output_dim)) + + def forward(self, x: torch.Tensor): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] + x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] + x = torch.cat( + [self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, + device=x.device), + x], dim=1) # shape = [*, grid ** 2 + 1, width] + x = x + self.positional_embedding.to(x.dtype) + + # After positional embeddings, we will attach prompts with the model, remember only those + # are trainable parameters here in whole image encoder. + if self.VPT_shallow: + visual_ctx = self.VPT.expand(x.shape[0], -1, -1).half() + x = torch.cat([x, visual_ctx], dim=1) + + + # Normal code as before + x = self.ln_pre(x) + + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + + x = self.ln_post(x[:, 0, :]) + + if self.proj is not None: + x = x @ self.proj + + return x + + +class VisionTransformer_MaPLe(nn.Module): + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int, + design_details): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + self.conv1 = nn.Conv2d(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias=False) + self.VPT_shallow = True + scale = width ** -0.5 + self.class_embedding = nn.Parameter(scale * torch.randn(width)) + self.positional_embedding = nn.Parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width)) + self.ln_pre = LayerNorm(width) + # hyper-parameter if need to add prompt embeddings inside to the input + # of transformer block or not: + self.prompt_till_layer_visual = 0 + self.transformer = Transformer(width, layers, heads, design_details=design_details) + + self.ln_post = LayerNorm(width) + self.proj = nn.Parameter(scale * torch.randn(width, output_dim)) + + def forward(self, x: torch.Tensor, shared_ctx, compound_deeper_prompts): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] + x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] + x = torch.cat( + [self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), + x], dim=1) # shape = [*, grid ** 2 + 1, width] + x = x + self.positional_embedding.to(x.dtype) + + # After positional embeddings, we will attach prompts with the model, remember only those + # are trainable parameters here in whole image encoder. + if self.VPT_shallow: + visual_ctx = shared_ctx.expand(x.shape[0], -1, -1).half() + x = torch.cat([x, visual_ctx], dim=1) + else: + assert self.prompt_till_layer_visual == 0 + + # Normal code as before + x = self.ln_pre(x) + + x = x.permute(1, 0, 2) # NLD -> LND + # Again combine the inputs, so nn.sequential can work + outputs = self.transformer([x, compound_deeper_prompts, 0]) # third argument is counter + x = outputs[0] + x = x.permute(1, 0, 2) # LND -> NLD + + x = self.ln_post(x[:, 0, :]) + + if self.proj is not None: + x = x @ self.proj + + return x + + +class CLIP(nn.Module): + def __init__(self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int, + design_details + ): + super().__init__() + + self.context_length = context_length + trainer = design_details['trainer'] + + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet( + layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width + ) + else: + vision_heads = vision_width // 64 + if trainer == "MaPLe": + self.visual = VisionTransformer_MaPLe( + input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim, + design_details=design_details + ) + else: + self.visual = VisionTransformer( + input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim, + design_details=design_details + ) + # hyper-parameter if need to add prompt embeddings inside to the input + # of transformer block or not: + prompt_till_layer_text = design_details['language_depth'] + self.transformer = Transformer( + width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask() if "SuPr" not in trainer else self.build_attention_mask_SuPr( 1+design_details["language_ctx"]+design_details["max_name_len"]), + prompts_needed=prompt_till_layer_text, + text_layer=True, + design_details=design_details + ) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = nn.Parameter(torch.empty(self.context_length, transformer_width)) + self.ln_final = LayerNorm(transformer_width) + + self.text_projection = nn.Parameter(torch.empty(transformer_width, embed_dim)) + self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + + self.initialize_parameters() + + def initialize_parameters(self): + nn.init.normal_(self.token_embedding.weight, std=0.02) + nn.init.normal_(self.positional_embedding, std=0.01) + + if isinstance(self.visual, ModifiedResNet): + if self.visual.attnpool is not None: + std = self.visual.attnpool.c_proj.in_features ** -0.5 + nn.init.normal_(self.visual.attnpool.q_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.k_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.v_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.c_proj.weight, std=std) + + for resnet_block in [self.visual.layer1, self.visual.layer2, self.visual.layer3, self.visual.layer4]: + for name, param in resnet_block.named_parameters(): + if name.endswith("bn3.weight"): + nn.init.zeros_(param) + + proj_std = (self.transformer.width ** -0.5) * ((2 * self.transformer.layers) ** -0.5) + attn_std = self.transformer.width ** -0.5 + fc_std = (2 * self.transformer.width) ** -0.5 + for block in self.transformer.resblocks: + nn.init.normal_(block.attn.in_proj_weight, std=attn_std) + nn.init.normal_(block.attn.out_proj.weight, std=proj_std) + nn.init.normal_(block.mlp.c_fc.weight, std=fc_std) + nn.init.normal_(block.mlp.c_proj.weight, std=proj_std) + + if self.text_projection is not None: + nn.init.normal_(self.text_projection, std=self.transformer.width ** -0.5) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # pytorch uses additive attention mask; fill with -inf + mask = torch.empty(self.context_length, self.context_length) + mask.fill_(float("-inf")) + mask.triu_(1) # zero out the lower diagonal + return mask + + def build_attention_mask_SuPr(self, max_smallgrid): + # Create a mask of size (context_length, context_length) + # Create a mask attention to the lower triangular part, constrain within the single sentence + mask = torch.empty(self.context_length, self.context_length) + mask.fill_(float("-inf")) + + # Number of blocks (segments) to create the lower triangular block + num_blocks = (self.context_length) // max_smallgrid + + for n in range(num_blocks): + # Calculate the start and end indices for the current block + start_idx = n * max_smallgrid + end_idx = (n + 1) * max_smallgrid + + # Place the block into the larger mask at the appropriate location + mask[start_idx:end_idx, start_idx:end_idx].triu_(1) + if (self.context_length) % max_smallgrid != 0: #prevent nan + mask[num_blocks * max_smallgrid:, num_blocks * max_smallgrid] = \ + torch.zeros_like(mask[num_blocks * max_smallgrid:, num_blocks * max_smallgrid]).float() + return mask + + + @property + def dtype(self): + return self.visual.conv1.weight.dtype + + def encode_image(self, image): + return self.visual(image.type(self.dtype)) + + def encode_text(self, text): + x = self.token_embedding(text).type(self.dtype) # [batch_size, n_ctx, d_model] + + x = x + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection + + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = logit_scale * image_features @ text_features.t() + logits_per_text = logit_scale * text_features @ image_features.t() + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text + + +def convert_weights(model: nn.Module): + """Convert applicable model parameters to fp16""" + + def _convert_weights_to_fp16(l): + if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)): + l.weight.data = l.weight.data.half() + if l.bias is not None: + l.bias.data = l.bias.data.half() + + if isinstance(l, nn.MultiheadAttention): + for attr in [*[f"{s}_proj_weight" for s in ["in", "q", "k", "v"]], "in_proj_bias", "bias_k", "bias_v"]: + tensor = getattr(l, attr) + if tensor is not None: + tensor.data = tensor.data.half() + + for name in ["text_projection", "proj"]: + if hasattr(l, name): + attr = getattr(l, name) + if attr is not None: + attr.data = attr.data.half() + + model.apply(_convert_weights_to_fp16) + + +def build_model(state_dict: dict, design_details): + vit = "visual.proj" in state_dict + print(f'build model vit is {vit}') + + if vit: + vision_width = state_dict["visual.conv1.weight"].shape[0] + vision_layers = len( + [k for k in state_dict.keys() if k.startswith("visual.") and k.endswith(".attn.in_proj_weight")]) + vision_patch_size = state_dict["visual.conv1.weight"].shape[-1] + grid_size = round((state_dict["visual.positional_embedding"].shape[0] - 1) ** 0.5) + image_resolution = vision_patch_size * grid_size + else: + counts: list = [len(set(k.split(".")[2] for k in state_dict if k.startswith(f"visual.layer{b}"))) for b in + [1, 2, 3, 4]] + vision_layers = tuple(counts) + vision_width = state_dict["visual.layer1.0.conv1.weight"].shape[0] + output_width = round((state_dict["visual.attnpool.positional_embedding"].shape[0] - 1) ** 0.5) + vision_patch_size = None + assert output_width ** 2 + 1 == state_dict["visual.attnpool.positional_embedding"].shape[0] + image_resolution = output_width * 32 + + embed_dim = state_dict["text_projection"].shape[1] + context_length = state_dict["positional_embedding"].shape[0] + vocab_size = state_dict["token_embedding.weight"].shape[0] + transformer_width = state_dict["ln_final.weight"].shape[0] + transformer_heads = transformer_width // 64 + transformer_layers = len(set(k.split(".")[2] for k in state_dict if k.startswith(f"transformer.resblocks"))) + + model = CLIP( + embed_dim, + image_resolution, vision_layers, vision_width, vision_patch_size, + context_length, vocab_size, transformer_width, transformer_heads, transformer_layers, design_details + ) + + for key in ["input_resolution", "context_length", "vocab_size"]: + if key in state_dict: + del state_dict[key] + + convert_weights(model) + try: + model.load_state_dict(state_dict) + except: + missing_keys, _ = model.load_state_dict(state_dict, strict=False) + print('Weights not found for some missing keys: ', missing_keys) + return model.eval() diff --git a/clip/simple_tokenizer.py b/clip/simple_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..0a66286b7d5019c6e221932a813768038f839c91 --- /dev/null +++ b/clip/simple_tokenizer.py @@ -0,0 +1,132 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8+n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152-256-2+1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v+'' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + ( token[-1] + '',) + pairs = get_pairs(word) + + if not pairs: + return token+'' + + while True: + bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word)-1 and word[i+1] == second: + new_word.append(first+second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/configs/.DS_Store b/configs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3ddae41e560bd6e055a4d2537863d7e55b4547b5 Binary files /dev/null and b/configs/.DS_Store differ diff --git a/configs/datasets/caltech101.yaml b/configs/datasets/caltech101.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d3465f9a6f4153de383c6810c10d8fedef34908 --- /dev/null +++ b/configs/datasets/caltech101.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "Caltech101" diff --git a/configs/datasets/dtd.yaml b/configs/datasets/dtd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e4b05216f78afa6d9b2508ef5a753aa394032f0 --- /dev/null +++ b/configs/datasets/dtd.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "DescribableTextures" diff --git a/configs/datasets/eurosat.yaml b/configs/datasets/eurosat.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88e9b8342a3215f136627806c951138b7ddd4aef --- /dev/null +++ b/configs/datasets/eurosat.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "EuroSAT" diff --git a/configs/datasets/fgvc_aircraft.yaml b/configs/datasets/fgvc_aircraft.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa72f469c9d228ecf6f1c89e965a5d9b788b3f6e --- /dev/null +++ b/configs/datasets/fgvc_aircraft.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "FGVCAircraft" diff --git a/configs/datasets/food101.yaml b/configs/datasets/food101.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9f8bfed4c46d8f2fc62447f3439275ce84b6285 --- /dev/null +++ b/configs/datasets/food101.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "Food101" diff --git a/configs/datasets/imagenet.yaml b/configs/datasets/imagenet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1170ff671826c2027f51f18485eac69e1d070cd --- /dev/null +++ b/configs/datasets/imagenet.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "ImageNet" diff --git a/configs/datasets/imagenet_a.yaml b/configs/datasets/imagenet_a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccd3b30ffbef74ac5ad985e2d29d3ef28bdb651b --- /dev/null +++ b/configs/datasets/imagenet_a.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "ImageNetA" diff --git a/configs/datasets/imagenet_r.yaml b/configs/datasets/imagenet_r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f34269f7619443e5a0d38ecd916ba31297820373 --- /dev/null +++ b/configs/datasets/imagenet_r.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "ImageNetR" diff --git a/configs/datasets/imagenet_sketch.yaml b/configs/datasets/imagenet_sketch.yaml new file mode 100644 index 0000000000000000000000000000000000000000..feff22b0492bd3fe0b4f5baac1ad30122791bc0d --- /dev/null +++ b/configs/datasets/imagenet_sketch.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "ImageNetSketch" diff --git a/configs/datasets/imagenetv2.yaml b/configs/datasets/imagenetv2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fcdc0f10b16986534e1dafe533d3ff66207c886 --- /dev/null +++ b/configs/datasets/imagenetv2.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "ImageNetV2" diff --git a/configs/datasets/oxford_flowers.yaml b/configs/datasets/oxford_flowers.yaml new file mode 100644 index 0000000000000000000000000000000000000000..769ecfa84e5ae273c249c8ce63adb24189217d8a --- /dev/null +++ b/configs/datasets/oxford_flowers.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "OxfordFlowers" \ No newline at end of file diff --git a/configs/datasets/oxford_pets.yaml b/configs/datasets/oxford_pets.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14ea5099cf884c922fb62066466abc6e6cac6f65 --- /dev/null +++ b/configs/datasets/oxford_pets.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "OxfordPets" \ No newline at end of file diff --git a/configs/datasets/stanford_cars.yaml b/configs/datasets/stanford_cars.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0362affb74a0c123ea384ea8fe2f77a96d12348c --- /dev/null +++ b/configs/datasets/stanford_cars.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "StanfordCars" diff --git a/configs/datasets/sun397.yaml b/configs/datasets/sun397.yaml new file mode 100644 index 0000000000000000000000000000000000000000..461bfb6b388e03c9ab0b5877a7caa88e2f35bfd8 --- /dev/null +++ b/configs/datasets/sun397.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "SUN397" diff --git a/configs/datasets/ucf101.yaml b/configs/datasets/ucf101.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5467a6d9c2ce16944cbf3b35f0ae3a57b3a77202 --- /dev/null +++ b/configs/datasets/ucf101.yaml @@ -0,0 +1,2 @@ +DATASET: + NAME: "UCF101" diff --git a/configs/trainers/CoCoOp/vit_b16_c16_ep10_batch1.yaml b/configs/trainers/CoCoOp/vit_b16_c16_ep10_batch1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b403143cd0167b044de17646038989e5dcfd73a3 --- /dev/null +++ b/configs/trainers/CoCoOp/vit_b16_c16_ep10_batch1.yaml @@ -0,0 +1,35 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 1 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 10 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COCOOP: + N_CTX: 16 + CTX_INIT: "" + PREC: "fp16" \ No newline at end of file diff --git a/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1.yaml b/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..048d9a2f3a5e67ecd85a40e5088a3f89b730247b --- /dev/null +++ b/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1.yaml @@ -0,0 +1,35 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 1 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 10 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COCOOP: + N_CTX: 4 + CTX_INIT: "" + PREC: "fp16" \ No newline at end of file diff --git a/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1_ctxv1.yaml b/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1_ctxv1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68998d87da944f78cd33492781f5c35c7589d4b8 --- /dev/null +++ b/configs/trainers/CoCoOp/vit_b16_c4_ep10_batch1_ctxv1.yaml @@ -0,0 +1,35 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 10 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COCOOP: + N_CTX: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" \ No newline at end of file diff --git a/configs/trainers/CoCoOp/vit_b16_c8_ep10_batch1.yaml b/configs/trainers/CoCoOp/vit_b16_c8_ep10_batch1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3349c1495842dcdc7cdf34f9aaf78052d180a5e2 --- /dev/null +++ b/configs/trainers/CoCoOp/vit_b16_c8_ep10_batch1.yaml @@ -0,0 +1,35 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 1 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 10 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COCOOP: + N_CTX: 8 + CTX_INIT: "" + PREC: "fp16" \ No newline at end of file diff --git a/configs/trainers/CoOp/rn101.yaml b/configs/trainers/CoOp/rn101.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c67838f4f179620f4847f9fec95b9f9942a85d94 --- /dev/null +++ b/configs/trainers/CoOp/rn101.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 200 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN101" \ No newline at end of file diff --git a/configs/trainers/CoOp/rn101_ep50.yaml b/configs/trainers/CoOp/rn101_ep50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a291895a382806955b5dc2e83ae29a15603d80c5 --- /dev/null +++ b/configs/trainers/CoOp/rn101_ep50.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN101" \ No newline at end of file diff --git a/configs/trainers/CoOp/rn50.yaml b/configs/trainers/CoOp/rn50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7638b1bbf69990e9062eea96249c4a1ef6252e44 --- /dev/null +++ b/configs/trainers/CoOp/rn50.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 200 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN50" \ No newline at end of file diff --git a/configs/trainers/CoOp/rn50_ctxv1.yaml b/configs/trainers/CoOp/rn50_ctxv1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e902b566b75e6a63701699cb0bfa9b2c8c5b6f27 --- /dev/null +++ b/configs/trainers/CoOp/rn50_ctxv1.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 200 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN50" + +TRAINER: + COOP: + CTX_INIT: "a photo of a" diff --git a/configs/trainers/CoOp/rn50_ep100.yaml b/configs/trainers/CoOp/rn50_ep100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd0968f831c709b8985088e815a53c1c23b42b09 --- /dev/null +++ b/configs/trainers/CoOp/rn50_ep100.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN50" diff --git a/configs/trainers/CoOp/rn50_ep50.yaml b/configs/trainers/CoOp/rn50_ep50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d7ef34ea7173d21a96af13b0ff8022583483721 --- /dev/null +++ b/configs/trainers/CoOp/rn50_ep50.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN50" \ No newline at end of file diff --git a/configs/trainers/CoOp/rn50_ep50_ctxv1.yaml b/configs/trainers/CoOp/rn50_ep50_ctxv1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bf06f917e53d50e747971a82a25eae2b239dff9 --- /dev/null +++ b/configs/trainers/CoOp/rn50_ep50_ctxv1.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN50" + +TRAINER: + COOP: + CTX_INIT: "a photo of a" \ No newline at end of file diff --git a/configs/trainers/CoOp/rn50_val.yaml b/configs/trainers/CoOp/rn50_val.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b0e97e62566f3d1b8a62cc57d11d9047846978a --- /dev/null +++ b/configs/trainers/CoOp/rn50_val.yaml @@ -0,0 +1,17 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 200 + TEST: + BATCH_SIZE: 200 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +MODEL: + BACKBONE: + NAME: "RN50" \ No newline at end of file diff --git a/configs/trainers/CoOp/vit_b16.yaml b/configs/trainers/CoOp/vit_b16.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfbd36df632f3b651a1ac53639299dfd83c01cd0 --- /dev/null +++ b/configs/trainers/CoOp/vit_b16.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 200 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 50 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" \ No newline at end of file diff --git a/configs/trainers/CoOp/vit_b16_ep100.yaml b/configs/trainers/CoOp/vit_b16_ep100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31bd7bcdf83974d8a599bfb93306a6e6c0a53e2c --- /dev/null +++ b/configs/trainers/CoOp/vit_b16_ep100.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" \ No newline at end of file diff --git a/configs/trainers/CoOp/vit_b16_ep50.yaml b/configs/trainers/CoOp/vit_b16_ep50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..786b9805e31b981075573df343aa51ad25f22e2b --- /dev/null +++ b/configs/trainers/CoOp/vit_b16_ep50.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" \ No newline at end of file diff --git a/configs/trainers/CoOp/vit_b32.yaml b/configs/trainers/CoOp/vit_b32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..991984066bbcb99bc5c1240b41e9d8714a11c80c --- /dev/null +++ b/configs/trainers/CoOp/vit_b32.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 200 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/32" \ No newline at end of file diff --git a/configs/trainers/CoOp/vit_b32_ep50.yaml b/configs/trainers/CoOp/vit_b32_ep50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf9f8e1a110a6c2e5d5feffaab05e0a69f4f4e2e --- /dev/null +++ b/configs/trainers/CoOp/vit_b32_ep50.yaml @@ -0,0 +1,29 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/32" \ No newline at end of file diff --git a/configs/trainers/IVLP/vit_b16_c4_ep20_batch8_4+4ctx.yaml b/configs/trainers/IVLP/vit_b16_c4_ep20_batch8_4+4ctx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ef9c76b02a51b125469cbc8c8db4ce7e8581016 --- /dev/null +++ b/configs/trainers/IVLP/vit_b16_c4_ep20_batch8_4+4ctx.yaml @@ -0,0 +1,39 @@ +# Independent Vision Language Prompting +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 20 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + IVLP: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 9 + PROMPT_DEPTH_TEXT: 9 \ No newline at end of file diff --git a/configs/trainers/KgCoOp/rn50_ep100.yaml b/configs/trainers/KgCoOp/rn50_ep100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..207dfad7b0636f64fbb220b3e21eb626665fb7ba --- /dev/null +++ b/configs/trainers/KgCoOp/rn50_ep100.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 256 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "adam" #"sgd" + LR: 0.002 #0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN50" + +TRAINER: + COOP: + CTX_INIT: False diff --git a/configs/trainers/KgCoOp/rn50_ep100_b16.yaml b/configs/trainers/KgCoOp/rn50_ep100_b16.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93d9a90cfd72330d280cefd06072d38c491c1808 --- /dev/null +++ b/configs/trainers/KgCoOp/rn50_ep100_b16.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 16 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "adam" #"sgd" + LR: 0.002 #0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "RN50" + +TRAINER: + COOP: + CTX_INIT: False diff --git a/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1.yaml b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e52204093fc8cad1560033c90c698dbcd1cf396a --- /dev/null +++ b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1.yaml @@ -0,0 +1,34 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 500 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "adam" + LR: 0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + #EPS: 1e-3 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COOP: + CTX_INIT: False diff --git a/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b128.yaml b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b128.yaml new file mode 100644 index 0000000000000000000000000000000000000000..792621afa5e1715b0bd96212d65cfa66b3a901ff --- /dev/null +++ b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b128.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 128 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "adam" + LR: 0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COOP: + CTX_INIT: True diff --git a/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b16.yaml b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b16.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a3bfddc1be7fa895b004b0e896850e6c29589e3 --- /dev/null +++ b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b16.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 16 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "adam" + LR: 0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COOP: + CTX_INIT: False diff --git a/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b8.yaml b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b8.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb144435cd9e414acd6de86ed0ab816361c83970 --- /dev/null +++ b/configs/trainers/KgCoOp/vit_b16_ep100_ctxv1_b8.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "adam" + LR: 0.002 + MAX_EPOCH: 100 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 10 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + COOP: + CTX_INIT: False diff --git a/configs/trainers/MaPLe/vit_b16_c2_ep20_batch8_2ctx.yaml b/configs/trainers/MaPLe/vit_b16_c2_ep20_batch8_2ctx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f3889195ea7fa79056b1fe4a109e83b507b860d --- /dev/null +++ b/configs/trainers/MaPLe/vit_b16_c2_ep20_batch8_2ctx.yaml @@ -0,0 +1,36 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 20 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + MAPLE: + N_CTX: 2 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH: 9 diff --git a/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx.yaml b/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eabc92cfb78a8514f130a853475074b513780be8 --- /dev/null +++ b/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx.yaml @@ -0,0 +1,40 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 4 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.0035 + MAX_EPOCH: 5 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TEST: + FINAL_MODEL: "best_val" + NO_TEST: False + +TRAINER: + MAPLE: + N_CTX: 2 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH: 9 diff --git a/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx_cross_datasets.yaml b/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx_cross_datasets.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f91f656daa1341d1702d6e2bad37647e4318842 --- /dev/null +++ b/configs/trainers/MaPLe/vit_b16_c2_ep5_batch4_2ctx_cross_datasets.yaml @@ -0,0 +1,36 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 4 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.0026 + MAX_EPOCH: 2 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + MAPLE: + N_CTX: 2 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH: 3 \ No newline at end of file diff --git a/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml b/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbe381d46ee1e0204bac300e3fc1f0044fc7b2c6 --- /dev/null +++ b/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml @@ -0,0 +1,43 @@ +# PromptSRC: Prompting with Self-regularizing constraints +DATALOADER: + TRAIN_X: + BATCH_SIZE: 4 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.0025 + MAX_EPOCH: 20 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + PROMPTSRC: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 9 + PROMPT_DEPTH_TEXT: 9 + TEXT_LOSS_WEIGHT: 25 + IMAGE_LOSS_WEIGHT: 10 + GPA_MEAN: 15 + GPA_STD: 1 diff --git a/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets.yaml b/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets.yaml new file mode 100644 index 0000000000000000000000000000000000000000..242c204b23cb9c78310c3c62559c85a5209488c2 --- /dev/null +++ b/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets.yaml @@ -0,0 +1,43 @@ +# PromptSRC: Prompting with Self-regularizing constraints +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 20 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + PROMPTSRC: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 3 + PROMPT_DEPTH_TEXT: 3 + TEXT_LOSS_WEIGHT: 25 + IMAGE_LOSS_WEIGHT: 10 + GPA_MEAN: 6 + GPA_STD: 10 diff --git a/configs/trainers/PromptSRC/vit_b16_c2_ep50_batch4_4+4ctx_few_shot.yaml b/configs/trainers/PromptSRC/vit_b16_c2_ep50_batch4_4+4ctx_few_shot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9beefb44f501a2f3efad4c474e31aaebd84e1d3 --- /dev/null +++ b/configs/trainers/PromptSRC/vit_b16_c2_ep50_batch4_4+4ctx_few_shot.yaml @@ -0,0 +1,51 @@ +# PromptSRC: Prompting with Self-regularizing constraints +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +TEST: + FINAL_MODEL: "best_val" + NO_TEST: False + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + PROMPTSRC: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 9 + PROMPT_DEPTH_TEXT: 9 + TEXT_LOSS_WEIGHT: 25 + IMAGE_LOSS_WEIGHT: 10 +# Use the below configuration for: ImageNet, Caltech101, OxfordPets, Food101, UCF101 and SUN397 + GPA_MEAN: 30 + GPA_STD: 30 +# Use the below configuration for: StanfordCars, Flowers102, FGVCAircraft, DTD and EuroSAT +# GPA_MEAN: 45 +# GPA_STD: 5 \ No newline at end of file diff --git a/configs/trainers/SuPr/hard_prompts/genertic_templates.yaml b/configs/trainers/SuPr/hard_prompts/genertic_templates.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29608da81dbdd61f9c3b0a54b72798443efec820 --- /dev/null +++ b/configs/trainers/SuPr/hard_prompts/genertic_templates.yaml @@ -0,0 +1,83 @@ +#PromptSRC +- "a photo of a {}." +- "a bad photo of a {}." +- "a photo of many {}." +- "a sculpture of a {}." +- "a photo of the hard to see {}." +- "a low resolution photo of the {}." +- "a rendering of a {}." +- "graffiti of a {}." +- "a bad photo of the {}." +- "a cropped photo of the {}." +- "a tattoo of a {}." +- "the embroidered {}." +- "a photo of a hard to see {}." +- "a bright photo of a {}." +- "a photo of a clean {}." +- "a photo of a dirty {}." +- "a dark photo of the {}." +- "a drawing of a {}." +- "a photo of my {}." +- "the plastic {}." +- "a photo of the cool {}." +- "a close-up photo of a {}." +- "a black and white photo of the {}." +- "a painting of the {}." +- "a painting of a {}." +- "a pixelated photo of the {}." +- "a sculpture of the {}." +- "a bright photo of the {}." +- "a cropped photo of a {}." +- "a plastic {}." +- "a photo of the dirty {}." +- "a jpeg corrupted photo of a {}." +- "a blurry photo of the {}." +- "a photo of the {}." +- "a good photo of the {}." +- "a rendering of the {}." +- "a {} in a video game." +- "a photo of one {}." +- "a doodle of a {}." +- "a close-up photo of the {}." +- "the origami {}." +- "the {} in a video game." +- "a sketch of a {}." +- "a doodle of the {}." +- "a origami {}." +- "a low resolution photo of a {}." +- "the toy {}." +- "a rendition of the {}." +- "a photo of the clean {}." +- "a photo of a large {}." +- "a rendition of a {}." +- "a photo of a nice {}." +- "a photo of a weird {}." +- "a blurry photo of a {}." +- "a cartoon {}." +- "art of a {}." +- "a sketch of the {}." +- "a embroidered {}." +- "a pixelated photo of a {}." +- "itap of the {}." + +# - "a jpeg corrupted photo of the {}." +# - "a good photo of a {}." +# - "a plushie {}." +# - "a photo of the nice {}." +# - "a photo of the small {}." +# - "a photo of the weird {}." +# - "the cartoon {}." +# - "art of the {}." +# - "a drawing of the {}." +# - "a photo of the large {}." +# - "a black and white photo of a {}." +# - "the plushie {}." +# - "a dark photo of a {}." +# - "itap of a {}." +# - "graffiti of the {}." +# - "a toy {}." +# - "itap of my {}." +# - "a photo of a cool {}." +# - "a photo of a small {}." +# - "a tattoo of the {}." + diff --git a/configs/trainers/SuPr/hard_prompts/init.yaml b/configs/trainers/SuPr/hard_prompts/init.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2b92a2a4f3d9841fcac84235286d94e9e72086e --- /dev/null +++ b/configs/trainers/SuPr/hard_prompts/init.yaml @@ -0,0 +1,19 @@ + +- "a photo of many" +- "a sculpture of a" +- "a rendering of a" +- "a tattoo of a" +- "a drawing of a" +- "a photo of my" +- "a painting of the" +- "a painting of a" +- "a sculpture of the" +- "a photo of the" +- "a rendering of the" +- "a photo of one" +- "a doodle of a" +- "a sketch of a" +- "a doodle of the" +- "a rendition of the" +- "a rendition of a" +- "a sketch of the" diff --git a/configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml b/configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b46634994525f5a4839af5129c1fe1dd847b898 --- /dev/null +++ b/configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml @@ -0,0 +1,45 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 4 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.0025 + MAX_EPOCH: 10 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + SUPR: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 9 + PROMPT_DEPTH_TEXT: 9 + SPACE_DIM: 7 + ENSEMBLE_NUM: 3 + REG_LOSS_WEIGHT: 60 + LAMBDA: 0.7 + HARD_PROMPT_PATH: "configs/trainers/SuPr/hard_prompts/" + TRAINER_BACKBONE: "SuPr" + diff --git a/configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml b/configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c894a3af17e67a5de2179264335621f6a599165 --- /dev/null +++ b/configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml @@ -0,0 +1,47 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 10 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + CHECKPOINT_FREQ: 1 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + SUPR: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 3 + PROMPT_DEPTH_TEXT: 3 + SPACE_DIM: 2 + ENSEMBLE_NUM: 3 + REG_LOSS_WEIGHT: 60 + LAMBDA: 0.5 + HARD_PROMPT_PATH: "configs/trainers/SuPr/hard_prompts/" + TRAINER_BACKBONE: "SuPr" + SVD: False # use least square save gpu memory(mathematical equivalent) + diff --git a/configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml b/configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36380a42171e33b002168a8393649f48a6e5415d --- /dev/null +++ b/configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml @@ -0,0 +1,57 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 4 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.0025 + MAX_EPOCH: 20 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + SUPR: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 9 + PROMPT_DEPTH_TEXT: 9 + SPACE_DIM: 8 + ENSEMBLE_NUM: 3 + REG_LOSS_WEIGHT: 45 + LAMBDA: 0.7 + HARD_PROMPT_PATH: "configs/trainers/SuPr/hard_prompts/" + TRAINER_BACKBONE: "SuPr" + SVD: False # use least square save gpu memory(mathematical equivalent) + + PROMPTSRC: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 9 + PROMPT_DEPTH_TEXT: 9 + TEXT_LOSS_WEIGHT: 7 + IMAGE_LOSS_WEIGHT: 7 + GPA_MEAN: 15 + GPA_STD: 1 diff --git a/configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml b/configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e3003968331039b2396f67898adfa3ad244c567 --- /dev/null +++ b/configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml @@ -0,0 +1,49 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 8 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.005 + MAX_EPOCH: 25 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 20 + +TEST: + FINAL_MODEL: "best_val" + NO_TEST: False + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + SUPR: + N_CTX_VISION: 4 + N_CTX_TEXT: 4 + CTX_INIT: "a photo of a" + PREC: "fp16" + PROMPT_DEPTH_VISION: 9 + PROMPT_DEPTH_TEXT: 9 + SPACE_DIM: 7 + ENSEMBLE_NUM: 3 + REG_LOSS_WEIGHT: 60 + LAMBDA: 0.7 + HARD_PROMPT_PATH: "configs/trainers/SuPr/hard_prompts/" + TRAINER_BACKBONE: "SuPr" + SVD: False # use least square save gpu memory(mathematical equivalent) diff --git a/configs/trainers/TCP/vit_b16_ep100_ctxv1.yaml b/configs/trainers/TCP/vit_b16_ep100_ctxv1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..715f2ef09dc42029bcdf55076d1f9211f11b025b --- /dev/null +++ b/configs/trainers/TCP/vit_b16_ep100_ctxv1.yaml @@ -0,0 +1,33 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 4 + TEST: + BATCH_SIZE: 500 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + +OPTIM: + NAME: "sgd" + LR: 0.0025 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 + +TRAIN: + PRINT_FREQ: 5 + +MODEL: + BACKBONE: + NAME: "ViT-B/16" + +TRAINER: + TCP: + CTX_INIT: "a photo of a" diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/datasets/__pycache__/__init__.cpython-38.pyc b/datasets/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..caf3203b71aab26eb5419f36c12157f9c085abc9 Binary files /dev/null and b/datasets/__pycache__/__init__.cpython-38.pyc differ diff --git a/datasets/__pycache__/caltech101.cpython-38.pyc b/datasets/__pycache__/caltech101.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a2dfb0b85f504d5dd789588497473ff5cb491ad Binary files /dev/null and b/datasets/__pycache__/caltech101.cpython-38.pyc differ diff --git a/datasets/__pycache__/dtd.cpython-38.pyc b/datasets/__pycache__/dtd.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3ea39297e93b0f361d4254df9d0bf5dbf00727d Binary files /dev/null and b/datasets/__pycache__/dtd.cpython-38.pyc differ diff --git a/datasets/__pycache__/eurosat.cpython-38.pyc b/datasets/__pycache__/eurosat.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fec675a843271c254fefe08b3fc83eaebbab084 Binary files /dev/null and b/datasets/__pycache__/eurosat.cpython-38.pyc differ diff --git a/datasets/__pycache__/fgvc_aircraft.cpython-38.pyc b/datasets/__pycache__/fgvc_aircraft.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..231504b59bf50b89b546574da463fbbab2087752 Binary files /dev/null and b/datasets/__pycache__/fgvc_aircraft.cpython-38.pyc differ diff --git a/datasets/__pycache__/food101.cpython-38.pyc b/datasets/__pycache__/food101.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3002f60306e69daaa3d4af1f5b9998f711d6c88 Binary files /dev/null and b/datasets/__pycache__/food101.cpython-38.pyc differ diff --git a/datasets/__pycache__/imagenet.cpython-38.pyc b/datasets/__pycache__/imagenet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0e0b0438b10d9c0ed5739d1a59fb69adc20b3d3 Binary files /dev/null and b/datasets/__pycache__/imagenet.cpython-38.pyc differ diff --git a/datasets/__pycache__/imagenet_a.cpython-38.pyc b/datasets/__pycache__/imagenet_a.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc966ad708dba0e763547c64c4f66a6e55a4f194 Binary files /dev/null and b/datasets/__pycache__/imagenet_a.cpython-38.pyc differ diff --git a/datasets/__pycache__/imagenet_r.cpython-38.pyc b/datasets/__pycache__/imagenet_r.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0bffc4f3f5181cf3e20326e4794cd47448f3ad95 Binary files /dev/null and b/datasets/__pycache__/imagenet_r.cpython-38.pyc differ diff --git a/datasets/__pycache__/imagenet_sketch.cpython-38.pyc b/datasets/__pycache__/imagenet_sketch.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72b49fad76e38536aa2c73c9e645206f9135d856 Binary files /dev/null and b/datasets/__pycache__/imagenet_sketch.cpython-38.pyc differ diff --git a/datasets/__pycache__/imagenetv2.cpython-38.pyc b/datasets/__pycache__/imagenetv2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e5e41d88fbd79afb05a725f09348e5fabb22607 Binary files /dev/null and b/datasets/__pycache__/imagenetv2.cpython-38.pyc differ diff --git a/datasets/__pycache__/oxford_flowers.cpython-38.pyc b/datasets/__pycache__/oxford_flowers.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2f940af47364df9c44ad755d4b44f7f5d0ab45e Binary files /dev/null and b/datasets/__pycache__/oxford_flowers.cpython-38.pyc differ diff --git a/datasets/__pycache__/oxford_pets.cpython-38.pyc b/datasets/__pycache__/oxford_pets.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2bdfa22c004476b0d74c2b23e229f41d1e67e7ed Binary files /dev/null and b/datasets/__pycache__/oxford_pets.cpython-38.pyc differ diff --git a/datasets/__pycache__/stanford_cars.cpython-38.pyc b/datasets/__pycache__/stanford_cars.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1492959d14a348dc7b04bd643eb23adaf812da16 Binary files /dev/null and b/datasets/__pycache__/stanford_cars.cpython-38.pyc differ diff --git a/datasets/__pycache__/sun397.cpython-38.pyc b/datasets/__pycache__/sun397.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d85aba8973337527834598a29eef3f2570a674fa Binary files /dev/null and b/datasets/__pycache__/sun397.cpython-38.pyc differ diff --git a/datasets/__pycache__/ucf101.cpython-38.pyc b/datasets/__pycache__/ucf101.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6731128f6232e5dc6f143af598303b16743b8a0b Binary files /dev/null and b/datasets/__pycache__/ucf101.cpython-38.pyc differ diff --git a/datasets/caltech101.py b/datasets/caltech101.py new file mode 100644 index 0000000000000000000000000000000000000000..4ab07ddbbf61ef7556322567a37a72392cd3b007 --- /dev/null +++ b/datasets/caltech101.py @@ -0,0 +1,62 @@ +import os +import pickle + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import mkdir_if_missing + +from .oxford_pets import OxfordPets +from .dtd import DescribableTextures as DTD + +IGNORED = ["BACKGROUND_Google", "Faces_easy"] +NEW_CNAMES = { + "airplanes": "airplane", + "Faces": "face", + "Leopards": "leopard", + "Motorbikes": "motorbike", +} + + +@DATASET_REGISTRY.register() +class Caltech101(DatasetBase): + + dataset_dir = "caltech-101" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "101_ObjectCategories") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_Caltech101.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) + else: + train, val, test = DTD.read_and_split_data(self.image_dir, ignored=IGNORED, new_cnames=NEW_CNAMES) + OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) \ No newline at end of file diff --git a/datasets/dtd.py b/datasets/dtd.py new file mode 100644 index 0000000000000000000000000000000000000000..376ec5e2574f8686a1a81bf8ae9bde52ef02b26e --- /dev/null +++ b/datasets/dtd.py @@ -0,0 +1,97 @@ +import os +import pickle +import random + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import listdir_nohidden, mkdir_if_missing + +from .oxford_pets import OxfordPets + + +@DATASET_REGISTRY.register() +class DescribableTextures(DatasetBase): + + dataset_dir = "dtd" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "images") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_DescribableTextures.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) + else: + train, val, test = self.read_and_split_data(self.image_dir) + OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + @staticmethod + def read_and_split_data(image_dir, p_trn=0.5, p_val=0.2, ignored=[], new_cnames=None): + # The data are supposed to be organized into the following structure + # ============= + # images/ + # dog/ + # cat/ + # horse/ + # ============= + categories = listdir_nohidden(image_dir) + categories = [c for c in categories if c not in ignored] + categories.sort() + + p_tst = 1 - p_trn - p_val + print(f"Splitting into {p_trn:.0%} train, {p_val:.0%} val, and {p_tst:.0%} test") + + def _collate(ims, y, c): + items = [] + for im in ims: + item = Datum(impath=im, label=y, classname=c) # is already 0-based + items.append(item) + return items + + train, val, test = [], [], [] + for label, category in enumerate(categories): + category_dir = os.path.join(image_dir, category) + images = listdir_nohidden(category_dir) + images = [os.path.join(category_dir, im) for im in images] + random.shuffle(images) + n_total = len(images) + n_train = round(n_total * p_trn) + n_val = round(n_total * p_val) + n_test = n_total - n_train - n_val + assert n_train > 0 and n_val > 0 and n_test > 0 + + if new_cnames is not None and category in new_cnames: + category = new_cnames[category] + + train.extend(_collate(images[:n_train], label, category)) + val.extend(_collate(images[n_train : n_train + n_val], label, category)) + test.extend(_collate(images[n_train + n_val :], label, category)) + + return train, val, test diff --git a/datasets/eurosat.py b/datasets/eurosat.py new file mode 100644 index 0000000000000000000000000000000000000000..c7f26857240053aa19c724852a24cd743df5520b --- /dev/null +++ b/datasets/eurosat.py @@ -0,0 +1,86 @@ +import os +import pickle + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import mkdir_if_missing + +from .oxford_pets import OxfordPets +from .dtd import DescribableTextures as DTD + +NEW_CNAMES = { + "AnnualCrop": "Annual Crop Land", + "Forest": "Forest", + "HerbaceousVegetation": "Herbaceous Vegetation Land", + "Highway": "Highway or Road", + "Industrial": "Industrial Buildings", + "Pasture": "Pasture Land", + "PermanentCrop": "Permanent Crop Land", + "Residential": "Residential Buildings", + "River": "River", + "SeaLake": "Sea or Lake", +} + + +@DATASET_REGISTRY.register() +class EuroSAT(DatasetBase): + + dataset_dir = "eurosat" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "2750") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_EuroSAT.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) + else: + train, val, test = DTD.read_and_split_data(self.image_dir, new_cnames=NEW_CNAMES) + OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + + if cfg.TRAINER.NAME == "PromptKD": + if cfg.TRAINER.MODAL == "base2novel": + train_x, _, _ = OxfordPets.subsample_classes(train, val, test, subsample='all') + _, _, test_base = OxfordPets.subsample_classes(train, val, test, subsample='base') + _, _, test_novel = OxfordPets.subsample_classes(train, val, test, subsample='new') + super().__init__(train_x=train_x, val=test_base, test=test_novel) + elif cfg.TRAINER.MODAL == "cross": + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + else: + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + def update_classname(self, dataset_old): + dataset_new = [] + for item_old in dataset_old: + cname_old = item_old.classname + cname_new = NEW_CLASSNAMES[cname_old] + item_new = Datum(impath=item_old.impath, label=item_old.label, classname=cname_new) + dataset_new.append(item_new) + return dataset_new diff --git a/datasets/fgvc_aircraft.py b/datasets/fgvc_aircraft.py new file mode 100644 index 0000000000000000000000000000000000000000..d93ac50fc17ea0a8903bad65988bf04226a68b87 --- /dev/null +++ b/datasets/fgvc_aircraft.py @@ -0,0 +1,74 @@ +import os +import pickle + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import mkdir_if_missing + +from .oxford_pets import OxfordPets + + +@DATASET_REGISTRY.register() +class FGVCAircraft(DatasetBase): + + dataset_dir = "fgvc_aircraft" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "images") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + classnames = [] + with open(os.path.join(self.dataset_dir, "variants.txt"), "r") as f: + lines = f.readlines() + for line in lines: + classnames.append(line.strip()) + cname2lab = {c: i for i, c in enumerate(classnames)} + + train = self.read_data(cname2lab, "images_variant_train.txt") + val = self.read_data(cname2lab, "images_variant_val.txt") + test = self.read_data(cname2lab, "images_variant_test.txt") + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + def read_data(self, cname2lab, split_file): + filepath = os.path.join(self.dataset_dir, split_file) + items = [] + + with open(filepath, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip().split(" ") + imname = line[0] + ".jpg" + classname = " ".join(line[1:]) + impath = os.path.join(self.image_dir, imname) + label = cname2lab[classname] + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/food101.py b/datasets/food101.py new file mode 100644 index 0000000000000000000000000000000000000000..60d212c434ffb511bb743d5e53076d01e03037df --- /dev/null +++ b/datasets/food101.py @@ -0,0 +1,54 @@ +import os +import pickle + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import mkdir_if_missing + +from .oxford_pets import OxfordPets +from .dtd import DescribableTextures as DTD + + +@DATASET_REGISTRY.register() +class Food101(DatasetBase): + + dataset_dir = "food-101" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "images") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_Food101.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) + else: + train, val, test = DTD.read_and_split_data(self.image_dir) + OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) \ No newline at end of file diff --git a/datasets/imagenet.py b/datasets/imagenet.py new file mode 100644 index 0000000000000000000000000000000000000000..84e500ebe9926e05349f5dc025f7f0aaa864fb50 --- /dev/null +++ b/datasets/imagenet.py @@ -0,0 +1,97 @@ +import os +import pickle +from collections import OrderedDict + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import listdir_nohidden, mkdir_if_missing + +from .oxford_pets import OxfordPets + + +@DATASET_REGISTRY.register() +class ImageNet(DatasetBase): + + dataset_dir = "imagenet" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + + # self.dataset_dir = '/root/imagenet/' + self.image_dir = os.path.join(self.dataset_dir, "images") + self.preprocessed = os.path.join(self.dataset_dir, "preprocessed.pkl") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.preprocessed): + with open(self.preprocessed, "rb") as f: + preprocessed = pickle.load(f) + train = preprocessed["train"] + test = preprocessed["test"] + else: + text_file = os.path.join(self.dataset_dir, "classnames.txt") + classnames = self.read_classnames(text_file) + train = self.read_data(classnames, "train") + # Follow standard practice to perform evaluation on the val set + # Also used as the val set (so evaluate the last-step model) + test = self.read_data(classnames, "val") + + preprocessed = {"train": train, "test": test} + with open(self.preprocessed, "wb") as f: + pickle.dump(preprocessed, f, protocol=pickle.HIGHEST_PROTOCOL) + + num_shots = cfg.DATASET.NUM_SHOTS + print(f"num_shots is {num_shots}") + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train = data["train"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + data = {"train": train} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + train, test = OxfordPets.subsample_classes(train, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + + # if cfg.TRAINER.NAME == "SuPr": + _,self.all_classnames = self.get_lab2cname(train) + + @staticmethod + def read_classnames(text_file): # 得到所有的class name + """Return a dictionary containing + key-value pairs of : . + """ + classnames = OrderedDict() + with open(text_file, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip().split(" ") + folder = line[0] + classname = " ".join(line[1:]) + classnames[folder] = classname + return classnames # 是一个array,数值为foldname: classname + + def read_data(self, classnames, split_dir): + split_dir = os.path.join(self.image_dir, split_dir) # 分别对应train, val + folders = sorted(f.name for f in os.scandir(split_dir) if f.is_dir()) + items = [] + + for label, folder in enumerate(folders): + imnames = listdir_nohidden(os.path.join(split_dir, folder)) + classname = classnames[folder] + for imname in imnames: + impath = os.path.join(split_dir, folder, imname) + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/imagenet_a.py b/datasets/imagenet_a.py new file mode 100644 index 0000000000000000000000000000000000000000..e013ec69c2bd1f469cd6309556be284044841ced --- /dev/null +++ b/datasets/imagenet_a.py @@ -0,0 +1,47 @@ +import os + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import listdir_nohidden + +from .imagenet import ImageNet + +TO_BE_IGNORED = ["README.txt"] + + +@DATASET_REGISTRY.register() +class ImageNetA(DatasetBase): + """ImageNet-A(dversarial). + + This dataset is used for testing only. + """ + + dataset_dir = "imagenet-adversarial" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "imagenet-a") + + text_file = os.path.join(self.dataset_dir, "classnames.txt") + classnames = ImageNet.read_classnames(text_file) + + data = self.read_data(classnames) + # if cfg.TRAINER.NAME == "SuPr": + _,self.all_classnames = self.get_lab2cname(data) + super().__init__(train_x=data, test=data) + + def read_data(self, classnames): + image_dir = self.image_dir + folders = listdir_nohidden(image_dir, sort=True) + folders = [f for f in folders if f not in TO_BE_IGNORED] + items = [] + + for label, folder in enumerate(folders): + imnames = listdir_nohidden(os.path.join(image_dir, folder)) + classname = classnames[folder] + for imname in imnames: + impath = os.path.join(image_dir, folder, imname) + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/imagenet_r.py b/datasets/imagenet_r.py new file mode 100644 index 0000000000000000000000000000000000000000..2170d814e2aa116e1afc49d40e6287d920fdc721 --- /dev/null +++ b/datasets/imagenet_r.py @@ -0,0 +1,47 @@ +import os + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import listdir_nohidden + +from .imagenet import ImageNet + +TO_BE_IGNORED = ["README.txt"] + + +@DATASET_REGISTRY.register() +class ImageNetR(DatasetBase): + """ImageNet-R(endition). + + This dataset is used for testing only. + """ + + dataset_dir = "imagenet-rendition" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "imagenet-r") + + text_file = os.path.join(self.dataset_dir, "classnames.txt") + classnames = ImageNet.read_classnames(text_file) + + data = self.read_data(classnames) + # if cfg.TRAINER.NAME == "SuPr": + _,self.all_classnames = self.get_lab2cname(data) + super().__init__(train_x=data, test=data) + + def read_data(self, classnames): + image_dir = self.image_dir + folders = listdir_nohidden(image_dir, sort=True) + folders = [f for f in folders if f not in TO_BE_IGNORED] + items = [] + + for label, folder in enumerate(folders): + imnames = listdir_nohidden(os.path.join(image_dir, folder)) + classname = classnames[folder] + for imname in imnames: + impath = os.path.join(image_dir, folder, imname) + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/imagenet_sketch.py b/datasets/imagenet_sketch.py new file mode 100644 index 0000000000000000000000000000000000000000..3343aae5f8bcb7a8825a7ced335384ab7ee2d20e --- /dev/null +++ b/datasets/imagenet_sketch.py @@ -0,0 +1,44 @@ +import os + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import listdir_nohidden + +from .imagenet import ImageNet + + +@DATASET_REGISTRY.register() +class ImageNetSketch(DatasetBase): + """ImageNet-Sketch. + + This dataset is used for testing only. + """ + + dataset_dir = "imagenet-sketch" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "images") + + text_file = os.path.join(self.dataset_dir, "classnames.txt") + classnames = ImageNet.read_classnames(text_file) + + data = self.read_data(classnames) + # if cfg.TRAINER.NAME == "SuPr": + _,self.all_classnames = self.get_lab2cname(data) + super().__init__(train_x=data, test=data) + + def read_data(self, classnames): + image_dir = self.image_dir + folders = listdir_nohidden(image_dir, sort=True) + items = [] + + for label, folder in enumerate(folders): + imnames = listdir_nohidden(os.path.join(image_dir, folder)) + classname = classnames[folder] + for imname in imnames: + impath = os.path.join(image_dir, folder, imname) + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/imagenetv2.py b/datasets/imagenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..ffab62188d51e334205a7a6141aad49d17224932 --- /dev/null +++ b/datasets/imagenetv2.py @@ -0,0 +1,47 @@ +import os + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import listdir_nohidden + +from .imagenet import ImageNet + + +@DATASET_REGISTRY.register() +class ImageNetV2(DatasetBase): + """ImageNetV2. + + This dataset is used for testing only. + """ + + dataset_dir = "imagenetv2" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + image_dir = "imagenetv2-matched-frequency-format-val" + self.image_dir = os.path.join(self.dataset_dir, image_dir) + + text_file = os.path.join(self.dataset_dir, "classnames.txt") + classnames = ImageNet.read_classnames(text_file) + + data = self.read_data(classnames) + # if cfg.TRAINER.NAME == "SuPr": + _,self.all_classnames = self.get_lab2cname(data) + super().__init__(train_x=data, test=data) + + def read_data(self, classnames): + image_dir = self.image_dir + folders = list(classnames.keys()) + items = [] + + for label in range(1000): + class_dir = os.path.join(image_dir, str(label)) + imnames = listdir_nohidden(class_dir) + folder = folders[label] + classname = classnames[folder] + for imname in imnames: + impath = os.path.join(class_dir, imname) + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/oxford_flowers.py b/datasets/oxford_flowers.py new file mode 100644 index 0000000000000000000000000000000000000000..7012b1c66b554263bad7309d9e376cfadf05fe4d --- /dev/null +++ b/datasets/oxford_flowers.py @@ -0,0 +1,91 @@ +import os +import pickle +import random +from scipy.io import loadmat +from collections import defaultdict + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import read_json, mkdir_if_missing + +from .oxford_pets import OxfordPets + + +@DATASET_REGISTRY.register() +class OxfordFlowers(DatasetBase): + + dataset_dir = "oxford_flowers" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "jpg") + self.label_file = os.path.join(self.dataset_dir, "imagelabels.mat") + self.lab2cname_file = os.path.join(self.dataset_dir, "cat_to_name.json") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordFlowers.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) + else: + train, val, test = self.read_data() + OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + def read_data(self): + tracker = defaultdict(list) + label_file = loadmat(self.label_file)["labels"][0] + for i, label in enumerate(label_file): + imname = f"image_{str(i + 1).zfill(5)}.jpg" + impath = os.path.join(self.image_dir, imname) + label = int(label) + tracker[label].append(impath) + + print("Splitting data into 50% train, 20% val, and 30% test") + + def _collate(ims, y, c): + items = [] + for im in ims: + item = Datum(impath=im, label=y - 1, classname=c) # convert to 0-based label + items.append(item) + return items + + lab2cname = read_json(self.lab2cname_file) + train, val, test = [], [], [] + for label, impaths in tracker.items(): + random.shuffle(impaths) + n_total = len(impaths) + n_train = round(n_total * 0.5) + n_val = round(n_total * 0.2) + n_test = n_total - n_train - n_val + assert n_train > 0 and n_val > 0 and n_test > 0 + cname = lab2cname[str(label)] + train.extend(_collate(impaths[:n_train], label, cname)) + val.extend(_collate(impaths[n_train : n_train + n_val], label, cname)) + test.extend(_collate(impaths[n_train + n_val :], label, cname)) + + return train, val, test diff --git a/datasets/oxford_pets.py b/datasets/oxford_pets.py new file mode 100644 index 0000000000000000000000000000000000000000..fd379d2e20d6bab3be26aef65b49cbed89bd8223 --- /dev/null +++ b/datasets/oxford_pets.py @@ -0,0 +1,196 @@ +import os +import pickle +import math +import random +from collections import defaultdict + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import read_json, write_json, mkdir_if_missing + + +@DATASET_REGISTRY.register() +class OxfordPets(DatasetBase): + + dataset_dir = "oxford_pets" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "images") + self.anno_dir = os.path.join(self.dataset_dir, "annotations") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordPets.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = self.read_split(self.split_path, self.image_dir) + else: + trainval = self.read_data(split_file="trainval.txt") + test = self.read_data(split_file="test.txt") + train, val = self.split_trainval(trainval) + self.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + def read_data(self, split_file): + filepath = os.path.join(self.anno_dir, split_file) + items = [] + + with open(filepath, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip() + imname, label, species, _ = line.split(" ") + breed = imname.split("_")[:-1] + breed = "_".join(breed) + breed = breed.lower() + imname += ".jpg" + impath = os.path.join(self.image_dir, imname) + label = int(label) - 1 # convert to 0-based index + item = Datum(impath=impath, label=label, classname=breed) + items.append(item) + + return items + + @staticmethod + def split_trainval(trainval, p_val=0.2): + p_trn = 1 - p_val + print(f"Splitting trainval into {p_trn:.0%} train and {p_val:.0%} val") + tracker = defaultdict(list) + for idx, item in enumerate(trainval): + label = item.label + tracker[label].append(idx) + + train, val = [], [] + for label, idxs in tracker.items(): + n_val = round(len(idxs) * p_val) + assert n_val > 0 + random.shuffle(idxs) + for n, idx in enumerate(idxs): + item = trainval[idx] + if n < n_val: + val.append(item) + else: + train.append(item) + + return train, val + + @staticmethod + def save_split(train, val, test, filepath, path_prefix): + def _extract(items): + out = [] + for item in items: + impath = item.impath + label = item.label + classname = item.classname + impath = impath.replace(path_prefix, "") + if impath.startswith("/"): + impath = impath[1:] + out.append((impath, label, classname)) + return out + + train = _extract(train) + val = _extract(val) + test = _extract(test) + + split = {"train": train, "val": val, "test": test} + + write_json(split, filepath) + print(f"Saved split to {filepath}") + + @staticmethod + def read_split(filepath, path_prefix): + def _convert(items): + out = [] + for impath, label, classname in items: + impath = os.path.join(path_prefix, impath) + item = Datum(impath=impath, label=int(label), classname=classname) + out.append(item) + return out + + print(f"Reading split from {filepath}") + split = read_json(filepath) + train = _convert(split["train"]) + val = _convert(split["val"]) + test = _convert(split["test"]) + + return train, val, test + + @staticmethod + def subsample_classes(*args, subsample="all"): + """Divide classes into two groups. The first group + represents base classes while the second group represents + new classes. + + Args: + args: a list of datasets, e.g. train, val and test. + subsample (str): what classes to subsample. + """ + assert subsample in ["all", "base", "new"] + + if subsample == "all": + return args + + dataset = args[0] + labels = set() + for item in dataset: + labels.add(item.label) + labels = list(labels) + labels.sort() + n = len(labels) + # Divide classes into two halves + m = math.ceil(n / 2) + + print(f"SUBSAMPLE {subsample.upper()} CLASSES!") + if subsample == "base": + selected = labels[:m] # take the first half + else: + selected = labels[m:] # take the second half + relabeler = {y: y_new for y_new, y in enumerate(selected)} + + output = [] + for dataset in args: + dataset_new = [] + for item in dataset: + if item.label not in selected: + continue + item_new = Datum( + impath=item.impath, + label=relabeler[item.label], + classname=item.classname + ) + dataset_new.append(item_new) + output.append(dataset_new) + + return output + + @staticmethod + def get_all_classnames(*args): + classnames = [] + for dataset in args: + for item in dataset: + classnames.append(item.classname) + return list(set(classnames)) \ No newline at end of file diff --git a/datasets/stanford_cars.py b/datasets/stanford_cars.py new file mode 100644 index 0000000000000000000000000000000000000000..a0802216571663b89b6bc3ec2076d7b9a30d6f5b --- /dev/null +++ b/datasets/stanford_cars.py @@ -0,0 +1,77 @@ +import os +import pickle +from scipy.io import loadmat + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import mkdir_if_missing + +from .oxford_pets import OxfordPets + + +@DATASET_REGISTRY.register() +class StanfordCars(DatasetBase): + + dataset_dir = "stanford_cars" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.split_path = os.path.join(self.dataset_dir, "split_zhou_StanfordCars.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.dataset_dir) + else: + trainval_file = os.path.join(self.dataset_dir, "devkit", "cars_train_annos.mat") + test_file = os.path.join(self.dataset_dir, "cars_test_annos_withlabels.mat") + meta_file = os.path.join(self.dataset_dir, "devkit", "cars_meta.mat") + trainval = self.read_data("cars_train", trainval_file, meta_file) + test = self.read_data("cars_test", test_file, meta_file) + train, val = OxfordPets.split_trainval(trainval) + OxfordPets.save_split(train, val, test, self.split_path, self.dataset_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + def read_data(self, image_dir, anno_file, meta_file): + anno_file = loadmat(anno_file)["annotations"][0] + meta_file = loadmat(meta_file)["class_names"][0] + items = [] + + for i in range(len(anno_file)): + imname = anno_file[i]["fname"][0] + impath = os.path.join(self.dataset_dir, image_dir, imname) + label = anno_file[i]["class"][0, 0] + label = int(label) - 1 # convert to 0-based index + classname = meta_file[label][0] + names = classname.split(" ") + year = names.pop(-1) + names.insert(0, year) + classname = " ".join(names) + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/sun397.py b/datasets/sun397.py new file mode 100644 index 0000000000000000000000000000000000000000..5ea8ab375378fe392bc0ef5331fd4a1ad04ca72b --- /dev/null +++ b/datasets/sun397.py @@ -0,0 +1,82 @@ +import os +import pickle + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import mkdir_if_missing + +from .oxford_pets import OxfordPets + + +@DATASET_REGISTRY.register() +class SUN397(DatasetBase): + + dataset_dir = "sun397" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "SUN397") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_SUN397.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) + else: + classnames = [] + with open(os.path.join(self.dataset_dir, "ClassName.txt"), "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip()[1:] # remove / + classnames.append(line) + cname2lab = {c: i for i, c in enumerate(classnames)} + trainval = self.read_data(cname2lab, "Training_01.txt") + test = self.read_data(cname2lab, "Testing_01.txt") + train, val = OxfordPets.split_trainval(trainval) + OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + def read_data(self, cname2lab, text_file): + text_file = os.path.join(self.dataset_dir, text_file) + items = [] + + with open(text_file, "r") as f: + lines = f.readlines() + for line in lines: + imname = line.strip()[1:] # remove / + classname = os.path.dirname(imname) + label = cname2lab[classname] + impath = os.path.join(self.image_dir, imname) + + names = classname.split("/")[1:] # remove 1st letter + names = names[::-1] # put words like indoor/outdoor at first + classname = " ".join(names) + + item = Datum(impath=impath, label=label, classname=classname) + items.append(item) + + return items diff --git a/datasets/ucf101.py b/datasets/ucf101.py new file mode 100644 index 0000000000000000000000000000000000000000..cb37e1ddc74e39882198a292ca74eb0f12ce2d01 --- /dev/null +++ b/datasets/ucf101.py @@ -0,0 +1,86 @@ +import os +import pickle +import re + +from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase +from dassl.utils import mkdir_if_missing + +from .oxford_pets import OxfordPets + + +@DATASET_REGISTRY.register() +class UCF101(DatasetBase): + + dataset_dir = "ucf101" + + def __init__(self, cfg): + root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) + self.dataset_dir = os.path.join(root, self.dataset_dir) + self.image_dir = os.path.join(self.dataset_dir, "UCF-101-midframes") + self.split_path = os.path.join(self.dataset_dir, "split_zhou_UCF101.json") + self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot") + mkdir_if_missing(self.split_fewshot_dir) + + if os.path.exists(self.split_path): + train, val, test = OxfordPets.read_split(self.split_path, self.image_dir) + else: + cname2lab = {} + filepath = os.path.join(self.dataset_dir, "ucfTrainTestlist/classInd.txt") + with open(filepath, "r") as f: + lines = f.readlines() + for line in lines: + label, classname = line.strip().split(" ") + label = int(label) - 1 # conver to 0-based index + cname2lab[classname] = label + + trainval = self.read_data(cname2lab, "ucfTrainTestlist/trainlist01.txt") + test = self.read_data(cname2lab, "ucfTrainTestlist/testlist01.txt") + train, val = OxfordPets.split_trainval(trainval) + OxfordPets.save_split(train, val, test, self.split_path, self.image_dir) + + num_shots = cfg.DATASET.NUM_SHOTS + if num_shots >= 1: + seed = cfg.SEED + preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl") + + if os.path.exists(preprocessed): + print(f"Loading preprocessed few-shot data from {preprocessed}") + with open(preprocessed, "rb") as file: + data = pickle.load(file) + train, val = data["train"], data["val"] + else: + train = self.generate_fewshot_dataset(train, num_shots=num_shots) + val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4)) + data = {"train": train, "val": val} + print(f"Saving preprocessed few-shot data to {preprocessed}") + with open(preprocessed, "wb") as file: + pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL) + + subsample = cfg.DATASET.SUBSAMPLE_CLASSES + train, _, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample) + super().__init__(train_x=train, val=test, test=test) + + # if cfg.TRAINER.NAME == "SuPr": + self.all_classnames = OxfordPets.get_all_classnames(train, val, test) + + def read_data(self, cname2lab, text_file): + text_file = os.path.join(self.dataset_dir, text_file) + items = [] + + with open(text_file, "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip().split(" ")[0] # trainlist: filename, label + action, filename = line.split("/") + label = cname2lab[action] + + elements = re.findall("[A-Z][^A-Z]*", action) + renamed_action = "_".join(elements) + + filename = filename.replace(".avi", ".jpg") + impath = os.path.join(self.image_dir, renamed_action, filename) + + item = Datum(impath=impath, label=label, classname=renamed_action) + items.append(item) + + return items diff --git a/docs/DATASETS.md b/docs/DATASETS.md new file mode 100644 index 0000000000000000000000000000000000000000..33ed7c21d186ca5a526f9c4023b5397255b9ce3c --- /dev/null +++ b/docs/DATASETS.md @@ -0,0 +1,233 @@ +# How to install datasets + +### Acknowledgement: This readme file for installing datasets has been borrowed directly from [MaPLe's](https://github.com/muzairkhattak/multimodal-prompt-learning) official repository. + +We recommend putting all datasets under the same folder (say `$DATA`) to ease management and following the instructions below to organize datasets to avoid modifying the source code. The file structure should look like: + +``` +$DATA/ +|–– imagenet/ +|–– caltech-101/ +|–– oxford_pets/ +|–– stanford_cars/ +``` + +If you have some datasets already installed somewhere else, you can create symbolic links in `$DATA/dataset_name` that point to the original data to avoid duplicate download. + +Datasets list: +- [ImageNet](#imagenet) +- [Caltech101](#caltech101) +- [OxfordPets](#oxfordpets) +- [StanfordCars](#stanfordcars) +- [Flowers102](#flowers102) +- [Food101](#food101) +- [FGVCAircraft](#fgvcaircraft) +- [SUN397](#sun397) +- [DTD](#dtd) +- [EuroSAT](#eurosat) +- [UCF101](#ucf101) +- [ImageNetV2](#imagenetv2) +- [ImageNet-Sketch](#imagenet-sketch) +- [ImageNet-A](#imagenet-a) +- [ImageNet-R](#imagenet-r) + +The instructions to prepare each dataset are detailed below. To ensure reproducibility and fair comparison for future work, we provide fixed train/val/test splits for all datasets except ImageNet where the validation set is used as test set. The fixed splits are either from the original datasets (if available) or created by us. + +### ImageNet +- Create a folder named `imagenet/` under `$DATA`. +- Create `images/` under `imagenet/`. +- Download the dataset from the [official website](https://image-net.org/index.php) and extract the training and validation sets to `$DATA/imagenet/images`. The directory structure should look like +``` +imagenet/ +|–– images/ +| |–– train/ # contains 1,000 folders like n01440764, n01443537, etc. +| |–– val/ +``` +- If you had downloaded the ImageNet dataset before, you can create symbolic links to map the training and validation sets to `$DATA/imagenet/images`. +- Download the `classnames.txt` to `$DATA/imagenet/` from this [link](https://drive.google.com/file/d/1-61f_ol79pViBFDG_IDlUQSwoLcn2XXF/view?usp=sharing). The class names are copied from [CLIP](https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb). + +### Caltech101 +- Create a folder named `caltech-101/` under `$DATA`. +- Download `101_ObjectCategories.tar.gz` from http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz and extract the file under `$DATA/caltech-101`. +- Download `split_zhou_Caltech101.json` from this [link](https://drive.google.com/file/d/1hyarUivQE36mY6jSomru6Fjd-JzwcCzN/view?usp=sharing) and put it under `$DATA/caltech-101`. + +The directory structure should look like +``` +caltech-101/ +|–– 101_ObjectCategories/ +|–– split_zhou_Caltech101.json +``` + +### OxfordPets +- Create a folder named `oxford_pets/` under `$DATA`. +- Download the images from https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz. +- Download the annotations from https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz. +- Download `split_zhou_OxfordPets.json` from this [link](https://drive.google.com/file/d/1501r8Ber4nNKvmlFVQZ8SeUHTcdTTEqs/view?usp=sharing). + +The directory structure should look like +``` +oxford_pets/ +|–– images/ +|–– annotations/ +|–– split_zhou_OxfordPets.json +``` + +### StanfordCars +- Create a folder named `stanford_cars/` under `$DATA`. +- Download the train images http://ai.stanford.edu/~jkrause/car196/cars_train.tgz. +- Download the test images http://ai.stanford.edu/~jkrause/car196/cars_test.tgz. +- Download the train labels https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz. +- Download the test labels http://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat. +- Download `split_zhou_StanfordCars.json` from this [link](https://drive.google.com/file/d/1ObCFbaAgVu0I-k_Au-gIUcefirdAuizT/view?usp=sharing). + +The directory structure should look like +``` +stanford_cars/ +|–– cars_test\ +|–– cars_test_annos_withlabels.mat +|–– cars_train\ +|–– devkit\ +|–– split_zhou_StanfordCars.json +``` + +### Flowers102 +- Create a folder named `oxford_flowers/` under `$DATA`. +- Download the images and labels from https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz and https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat respectively. +- Download `cat_to_name.json` from [here](https://drive.google.com/file/d/1AkcxCXeK_RCGCEC_GvmWxjcjaNhu-at0/view?usp=sharing). +- Download `split_zhou_OxfordFlowers.json` from [here](https://drive.google.com/file/d/1Pp0sRXzZFZq15zVOzKjKBu4A9i01nozT/view?usp=sharing). + +The directory structure should look like +``` +oxford_flowers/ +|–– cat_to_name.json +|–– imagelabels.mat +|–– jpg/ +|–– split_zhou_OxfordFlowers.json +``` + +### Food101 +- Download the dataset from https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/ and extract the file `food-101.tar.gz` under `$DATA`, resulting in a folder named `$DATA/food-101/`. +- Download `split_zhou_Food101.json` from [here](https://drive.google.com/file/d/1QK0tGi096I0Ba6kggatX1ee6dJFIcEJl/view?usp=sharing). + +The directory structure should look like +``` +food-101/ +|–– images/ +|–– license_agreement.txt +|–– meta/ +|–– README.txt +|–– split_zhou_Food101.json +``` + +### FGVCAircraft +- Download the data from https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/archives/fgvc-aircraft-2013b.tar.gz. +- Extract `fgvc-aircraft-2013b.tar.gz` and keep only `data/`. +- Move `data/` to `$DATA` and rename the folder to `fgvc_aircraft/`. + +The directory structure should look like +``` +fgvc_aircraft/ +|–– images/ +|–– ... # a bunch of .txt files +``` + +### SUN397 +- Create a folder named `sun397/` under `$DATA`. +- Download the images http://vision.princeton.edu/projects/2010/SUN/SUN397.tar.gz. +- Download the partitions https://vision.princeton.edu/projects/2010/SUN/download/Partitions.zip. +- Extract these files under `$DATA/sun397/`. +- Download `split_zhou_SUN397.json` from this [link](https://drive.google.com/file/d/1y2RD81BYuiyvebdN-JymPfyWYcd8_MUq/view?usp=sharing). + +The directory structure should look like +``` +sun397/ +|–– SUN397/ +|–– split_zhou_SUN397.json +|–– ... # a bunch of .txt files +``` + +### DTD +- Download the dataset from https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz and extract it to `$DATA`. This should lead to `$DATA/dtd/`. +- Download `split_zhou_DescribableTextures.json` from this [link](https://drive.google.com/file/d/1u3_QfB467jqHgNXC00UIzbLZRQCg2S7x/view?usp=sharing). + +The directory structure should look like +``` +dtd/ +|–– images/ +|–– imdb/ +|–– labels/ +|–– split_zhou_DescribableTextures.json +``` + +### EuroSAT +- Create a folder named `eurosat/` under `$DATA`. +- Download the dataset from http://madm.dfki.de/files/sentinel/EuroSAT.zip and extract it to `$DATA/eurosat/`. +- Download `split_zhou_EuroSAT.json` from [here](https://drive.google.com/file/d/1Ip7yaCWFi0eaOFUGga0lUdVi_DDQth1o/view?usp=sharing). + +The directory structure should look like +``` +eurosat/ +|–– 2750/ +|–– split_zhou_EuroSAT.json +``` + +### UCF101 +- Create a folder named `ucf101/` under `$DATA`. +- Download the zip file `UCF-101-midframes.zip` from [here](https://drive.google.com/file/d/10Jqome3vtUA2keJkNanAiFpgbyC9Hc2O/view?usp=sharing) and extract it to `$DATA/ucf101/`. This zip file contains the extracted middle video frames. +- Download `split_zhou_UCF101.json` from this [link](https://drive.google.com/file/d/1I0S0q91hJfsV9Gf4xDIjgDq4AqBNJb1y/view?usp=sharing). + +The directory structure should look like +``` +ucf101/ +|–– UCF-101-midframes/ +|–– split_zhou_UCF101.json +``` + +### ImageNetV2 +- Create a folder named `imagenetv2/` under `$DATA`. +- Go to this github repo https://github.com/modestyachts/ImageNetV2. +- Download the matched-frequency dataset from https://s3-us-west-2.amazonaws.com/imagenetv2public/imagenetv2-matched-frequency.tar.gz and extract it to `$DATA/imagenetv2/`. +- Copy `$DATA/imagenet/classnames.txt` to `$DATA/imagenetv2/`. + +The directory structure should look like +``` +imagenetv2/ +|–– imagenetv2-matched-frequency-format-val/ +|–– classnames.txt +``` + +### ImageNet-Sketch +- Download the dataset from https://github.com/HaohanWang/ImageNet-Sketch. +- Extract the dataset to `$DATA/imagenet-sketch`. +- Copy `$DATA/imagenet/classnames.txt` to `$DATA/imagenet-sketch/`. + +The directory structure should look like +``` +imagenet-sketch/ +|–– images/ # contains 1,000 folders whose names have the format of n* +|–– classnames.txt +``` + +### ImageNet-A +- Create a folder named `imagenet-adversarial/` under `$DATA`. +- Download the dataset from https://github.com/hendrycks/natural-adv-examples and extract it to `$DATA/imagenet-adversarial/`. +- Copy `$DATA/imagenet/classnames.txt` to `$DATA/imagenet-adversarial/`. + +The directory structure should look like +``` +imagenet-adversarial/ +|–– imagenet-a/ # contains 200 folders whose names have the format of n* +|–– classnames.txt +``` + +### ImageNet-R +- Create a folder named `imagenet-rendition/` under `$DATA`. +- Download the dataset from https://github.com/hendrycks/imagenet-r and extract it to `$DATA/imagenet-rendition/`. +- Copy `$DATA/imagenet/classnames.txt` to `$DATA/imagenet-rendition/`. + +The directory structure should look like +``` +imagenet-rendition/ +|–– imagenet-r/ # contains 200 folders whose names have the format of n* +|–– classnames.txt +``` \ No newline at end of file diff --git a/docs/EVAL.md b/docs/EVAL.md new file mode 100644 index 0000000000000000000000000000000000000000..8ba0fb0344169935b5ba2411e8043bd69c164dca --- /dev/null +++ b/docs/EVAL.md @@ -0,0 +1,115 @@ + +# 📑 Evaluating and Reproducing SuPr Results + +We provide ready-to-use bash scripts under the [scripts/](../scripts) directory for evaluating **SuPr**, **SuPr+PromptSRC**, and **SuPrEns** models using our pre-trained checkpoints. + +Please ensure that you update the `DATA` variable in each script to match your dataset path, and run all commands from the project root directory `SuPr/`. + +We have already provided: +- Precomputed evaluation results under [output/](../output) +- Aggregated and summarized statistics under [parse_results/](../parse_results) + +Below, we guide you through reproducing these results by yourself. + +--- + +## 🔥 SuPr Reproduction Guide + +We now explain how to reproduce our reported results step-by-step. + +--- + +### 🛠️ Preliminary Setup + +To reproduce the results (taking ImageNet as an example), follow these steps: + +1. **Create the environment and install dependencies** + - Follow the instructions in [INSTALL.md](../docs/INSTALL.md) to set up the environment and install the `Dassl.pytorch` library. + +2. **Prepare datasets** + - Follow the dataset preparation guidelines provided in [DATASETS.md](../docs/DATASETS.md). + +3. **Download pre-trained weights** + - Download the zipped folder containing all pre-trained weights from this [link](https://mbzuaiac-my.sharepoint.com/:f:/g/personal/syed_wasim_mbzuai_ac_ae/Em_3tkSj6T9AmhVjmzKTL3gBYNehhvfJl8ke2pU3U0nabA?e=9ecjQA). + - After extraction, the folder structure should look like: + +```text +weights/ +|–– SuPr/ +| |–– base2new/ +| | |–– imagenet/ +| | |–– shots_16/ +| | |–– seed1/ +| | |–– seed2/ +| | |–– seed3/ +| ... +| |–– cross_dg/ +| |–– fewshot/ +| +|–– SubspacePromptSRC/ +| |–– base2new/ +| ... +| +|–– SuPrEns/ +| |–– base2new/ +| ... +``` + +> **Important:** +> If you place the `weights/` folder outside the `Subspace_Prompting/` root directory, +> remember to update the `${WEIGHTSPATH}` variable inside the following scripts: +> - `scripts/supr/reproduce_base2novel_setting.sh` +> - `scripts/supr/reproduce_fewshot.sh` +> - `scripts/supr/reproduce_xd.sh` +> - `scripts/supr_src/reproduce_base2novel_setting.sh` +> - `scripts/supr_ens/reproduce_base2novel_setting.sh` + +--- + +### ⚡ Reproducing Experiments + +After setting up, run the following command from the `SuPr/` root directory: + +```bash +bash reproduce.sh +``` + +This command will automatically start evaluation across all settings, using the provided pre-trained models. + +The evaluation logs and results will be saved under the `output/` directory. + +--- + +### 📈 Aggregating Results + +After running evaluations, you can aggregate the results across seeds and tasks by running: + +```bash +# Base-to-Novel Evaluation Results + +# SuPr +python parse_test_res.py -type base2new output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16 + +# SuPr+PromptSRC +python parse_test_res.py -type base2new output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16 + +# SuPr Ensemble +python parse_test_res.py -type base2new output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16 + + +# Cross-Dataset Generalization Results +python parse_test_res.py -type cross output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16 + +# Domain Generalization Results +python parse_test_res.py -type dg output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16 +``` + +The aggregated results will be automatically compiled into Excel spreadsheets for easy reporting. + +--- + +> **Tip:** If you want to evaluate on other datasets beyond ImageNet, simply adjust the dataset names and paths accordingly in the scripts. + +> **Warning:** Ensure that datasets are correctly prepared and accessible by the scripts, otherwise evaluation may fail. + +--- diff --git a/docs/INSTALL.md b/docs/INSTALL.md new file mode 100644 index 0000000000000000000000000000000000000000..c7269470132596ac1ac71044973fb1f0e7e24615 --- /dev/null +++ b/docs/INSTALL.md @@ -0,0 +1,44 @@ +# Installation + +### Acknowledgement: This readme file for installing datasets is modified from [MaPLe's](https://github.com/muzairkhattak/multimodal-prompt-learning) official repository. + +This codebase is tested on Ubuntu 20.04.2 LTS with python 3.8. Follow the below steps to create environment and install dependencies. + +* Setup conda environment (recommended). +```bash +# Create a conda environment +conda create -y -n SuPr python=3.8 + +# Activate the environment +conda activate SuPr + +# Install torch (requires version >= 1.8.1) and torchvision +# Please refer to https://pytorch.org/ if you need a different cuda version +pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113 +``` + +* Clone SuPr code repository and install requirements +```bash +# Clone PromptSRC code base +git clone https://github.com/PRIS-CV/Subspace-Prompting + +cd Subspace_Prompting/ +# Install requirements + + +``` + +* Install dassl library. +```bash +# Instructions borrowed from https://github.com/KaiyangZhou/Dassl.pytorch#installation + +# Clone this repo +# original source: https://github.com/KaiyangZhou/Dassl.pytorch.git +cd Dassl.pytorch/ + +# Install dependencies +pip install -r requirements.txt + +# Install this library (no need to re-build if the source code is modified) +python setup.py develop +``` diff --git a/docs/TRAIN.md b/docs/TRAIN.md new file mode 100644 index 0000000000000000000000000000000000000000..038226c38272bc314c3536f73fe886563fdb5e1e --- /dev/null +++ b/docs/TRAIN.md @@ -0,0 +1,188 @@ + +# 🚀 Running SuPr + +This section provides detailed instructions on running **SuPr** experiments across different scenarios: base-to-novel transfer, cross-dataset/domain generalization, and few-shot learning. + +--- + +# 📋 Table of Contents +- [🚀 Running SuPr](#-running-supr) +- [📋 Table of Contents](#-table-of-contents) + - [🖥️ GPU and Memory Requirements](#️-gpu-and-memory-requirements) + - [(1) 🏆 Base-to-Novel Experiments](#1--base-to-novel-experiments) + - [Step-by-Step Instructions](#step-by-step-instructions) + - [🔥 SuPr + PromptSRC](#-supr--promptsrc) + - [(2) 🌐 Cross-Dataset / Domain Generalization Experiments](#2--cross-dataset--domain-generalization-experiments) + - [Step-by-Step Instructions](#step-by-step-instructions-1) + - [(3) 🎯 Few-Shot Learning Experiments](#3--few-shot-learning-experiments) + - [Step-by-Step Instructions](#step-by-step-instructions-2) + +--- + +## 🖥️ GPU and Memory Requirements + +- All experiments are trained with a **batch size of 4** on a **single NVIDIA 4090** GPU, with the exception of ImageNet. +- **ImageNet** experiments require approximately **30 GB** of GPU memory. For ImageNet, we recommend using a **single NVIDIA A800**. +- We provide two implementations for projection: + - **SVD**-based projection + - **Least squares**-based projection + > **Tip:** Although mathematically equivalent, the least squares method is more GPU memory-efficient. + +--- + +## (1) 🏆 Base-to-Novel Experiments + +### Step-by-Step Instructions + +1. **Configuration** + Modify the configuration file located at: + ``` + configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml + ``` + +2. **Update Dataset Path** + Change the dataset path in: + - `scripts/supr/base2new.sh` (for SuPr) + - `scripts/supr_ens/base2new.sh` (for SuPrEns) + + (Modify **line 4** to point to your local dataset directory.) + +3. **Training Commands** + Run the following command to train SuPr (repeat for seeds 1, 2, and 3): + + ```bash + # Set dataset (e.g., imagenet) + # Available datasets: [caltech101, food101, dtd, ucf101, oxford_flowers, oxford_pets, fgvc_aircraft, stanford_cars, sun397, eurosat] + + # Train SuPr + sh scripts/supr/base2new.sh imagenet + + # Train SuPr Ens + sh scripts/supr_ens/base2new.sh imagenet + ``` + +4. **Output Directory** + Results will be saved automatically at: + ``` + Base results: output/base2new/${TRAINER}/${CFG}/train_base/${DATASET}/shots_${SHOTS}/seed${SEED} + Novel results: output/base2new/${TRAINER}/${CFG}/test_new/${DATASET}/shots_${SHOTS}/seed${SEED} + ``` + +5. **Result Aggregation** + After finishing training for all seeds, run: + + ```bash + # Aggregate base-to-novel results + python parse_test_res.py -type base2new output/base2new/SuPr/vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16 + ``` + +--- + +### 🔥 SuPr + PromptSRC + +To run SuPr combined with PromptSRC: + +1. **Configuration** + Use the configuration file: + ``` + configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml + ``` + +2. **Training Command** + ```bash + # Train SuPr+PromptSRC + sh scripts/supr_src/base2new.sh imagenet + ``` + +--- + +## (2) 🌐 Cross-Dataset / Domain Generalization Experiments + +### Step-by-Step Instructions + +1. **Configuration** + Edit the configuration file at: + ``` + configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml + ``` + +2. **Update Dataset Path** + Change the dataset path in: + ``` + scripts/supr/cross_dg.sh (line 4) + ``` + +3. **Training Command** + Run the following script: + + ```bash + # This script will: + # 1. Train SuPr on ImageNet (3 seeds) + # 2. Evaluate on 10 cross-datasets + # 3. Perform DG evaluation on ImageNetV2, ImageNet-Sketch, ImageNet-A, and ImageNet-R + + sh scripts/supr/cross_dg.sh + ``` + +4. **Output Directory** + Results will be saved at: + ``` + output/cross_dg/${TRAINER}/${CFG}/${DATASET}/shots_${SHOTS}/seed${SEED} + ``` + +5. **Result Aggregation** + + ```bash + # Aggregate cross-dataset results + python parse_test_res.py -type cross output/cross_dg/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16 + + # Aggregate domain generalization results + python parse_test_res.py -type dg output/cross_dg/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16 + ``` + +--- + +## (3) 🎯 Few-Shot Learning Experiments + +### Step-by-Step Instructions + +1. **Configuration** + Edit the configuration file at: + ``` + configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml + ``` + +2. **Update Dataset Path** + Change the dataset path in: + ``` + scripts/supr/few_shot.sh (line 4) + ``` + +3. **Training Command** + ```bash + # dataset=imagenet + # Other available datasets: [caltech101, food101, dtd, ucf101, oxford_flowers, oxford_pets, fgvc_aircraft, stanford_cars, sun397, eurosat] + + sh scripts/supr/fewshot.sh imagenet + ``` + +4. **Output Directory** + Results will be saved at: + ``` + output/fewshot/${TRAINER}/${CFG}/${DATASET}/shots_${SHOTS}/seed${SEED} + ``` + +5. **Result Aggregation** + + ```bash + # Aggregate few-shot results + python parse_test_res.py -type fewshot output/fewshot/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4 + ``` + +--- + +> **Tip:** Always run experiments across **three random seeds** to ensure reproducibility and statistically stable results. +> +> **Warning:** Be sure to update dataset paths correctly before launching the scripts. Missing this may lead to training failures or empty outputs. + +--- diff --git a/docs/color.jpg b/docs/color.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2a1d770b9053a3c22e39823075c4620b3fa57bba Binary files /dev/null and b/docs/color.jpg differ diff --git a/docs/insight.jpg b/docs/insight.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b67d2882ebdaa95bbc085817239a464e9ddc96fa --- /dev/null +++ b/docs/insight.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a44aac733dc7c76c9c411c2b48d392b586650999cc7bf1cd7bdacaddb0a185 +size 238057 diff --git a/docs/insight.pdf b/docs/insight.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5cc0843fed5a72d2299941d5b3418ad9541e3f06 --- /dev/null +++ b/docs/insight.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467d460259690fadc7a13bd8b6ebbd7e439575fce5a37adcfb2ca63bd001b787 +size 1221626 diff --git a/docs/style.jpg b/docs/style.jpg new file mode 100644 index 0000000000000000000000000000000000000000..154ccbc902baec4c251938d5ecf6584c41c8533c --- /dev/null +++ b/docs/style.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44dcbb4e748d66d99a12cca30be6d430c1e37e24015e065d7a5a64e50be54239 +size 121915 diff --git a/docs/texture.jpg b/docs/texture.jpg new file mode 100644 index 0000000000000000000000000000000000000000..17eecd2c58539eb642218becd7faa01608889982 --- /dev/null +++ b/docs/texture.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503424a2c9f8474b9782eb7fa328298311507821e2dfbf22d239fabed38362b5 +size 152152 diff --git a/docs/vis.jpg b/docs/vis.jpg new file mode 100644 index 0000000000000000000000000000000000000000..aee850b37595d5083fe6c4f6160dd08713d1e0bb --- /dev/null +++ b/docs/vis.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abca7288f57bd2599f12b4393f101c3a8c2a2c26d63fa97c1e26c1893abe8c59 +size 118256 diff --git a/docs/walking.jpg b/docs/walking.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1f88be351f229e83a2e26daf358e24b50f305ba7 --- /dev/null +++ b/docs/walking.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9389c268b86b93f2f7f13ec40e5f9076f5186924f31761f2da158e091598e614 +size 210227 diff --git a/output/.DS_Store b/output/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c6f565d6ae8bcaac1c0b4557cea7c15d55db920e Binary files /dev/null and b/output/.DS_Store differ diff --git a/output/base2new/.DS_Store b/output/base2new/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c1e1e0d77c8ed074015d0eb5da3d9963bab5596d Binary files /dev/null and b/output/base2new/.DS_Store differ diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae0854f15e38e9cc4003d5b8a74c3762a1a86efa --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/caltech101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/caltech101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 861 +* accuracy: 94.0% +* error: 6.0% +* macro_f1: 94.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6dba3d031ae4bc275130be1752ce87f0e29201c5 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/caltech101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/caltech101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 861 +* accuracy: 94.0% +* error: 6.0% +* macro_f1: 94.6% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0cc01ff1c1b31ac5d92570129990700eae7815b7 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/caltech101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/caltech101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 870 +* accuracy: 95.0% +* error: 5.0% +* macro_f1: 95.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a2ef94344d377b477e32851938f1d13ca7fb31d8 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/dtd/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/dtd/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 536 +* accuracy: 64.7% +* error: 35.3% +* macro_f1: 62.9% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6e3443f16fa5a1acaca820d8e2d628b1b138da4b --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/dtd/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/dtd/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 504 +* accuracy: 60.9% +* error: 39.1% +* macro_f1: 59.7% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..05a92590749690399140b06e15c59f3a589d387b --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/dtd/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/dtd/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 525 +* accuracy: 63.4% +* error: 36.6% +* macro_f1: 62.2% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..472ad1a23d9d2f1819790d2b35ff1f00082a8427 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/eurosat/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'image_encoder.VPT', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/eurosat/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 2,908 +* accuracy: 74.6% +* error: 25.4% +* macro_f1: 72.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..264df3424e10ef6507391be7d91ecf2e9b322b72 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/eurosat/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.VPT', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/eurosat/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 3,245 +* accuracy: 83.2% +* error: 16.8% +* macro_f1: 81.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c776d7652af7c68264399d7254366e43d908048e --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/eurosat/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'prompt_learner.ctx_space.1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/eurosat/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 3,148 +* accuracy: 80.7% +* error: 19.3% +* macro_f1: 79.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..271da90b0b1c40109ad2f60ae15f17a9c2344953 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/fgvc_aircraft/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/fgvc_aircraft/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 648 +* accuracy: 38.9% +* error: 61.1% +* macro_f1: 34.5% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8085faeeb9f5d68d643371f44e7caff9dffb933 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/fgvc_aircraft/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/fgvc_aircraft/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 613 +* accuracy: 36.8% +* error: 63.2% +* macro_f1: 33.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e7608ab7d9b963867d1dff5fc2070a4e76a7acd --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/fgvc_aircraft/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/fgvc_aircraft/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 640 +* accuracy: 38.4% +* error: 61.6% +* macro_f1: 34.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..bf1e610cc50ee7440469a3bf5a89a998b2fddaa6 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/food101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/food101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,719 +* accuracy: 91.5% +* error: 8.5% +* macro_f1: 91.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6fc0443e4a6bf6fb9910581535907ee0947296f2 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/food101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/food101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,720 +* accuracy: 91.5% +* error: 8.5% +* macro_f1: 91.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..34e6db7ef19dceaf9209d28bb47c78c115b55760 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/food101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/food101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,797 +* accuracy: 92.0% +* error: 8.0% +* macro_f1: 92.0% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c822293d2e9d3ddbc150fd9bf8b9b35a38ac3cc --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/imagenet/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,509 +* accuracy: 70.0% +* error: 30.0% +* macro_f1: 69.2% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..825509451fa488a24b645067a100e5b3a778cfbd --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/imagenet/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'prompt_learner.ctx', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,591 +* accuracy: 70.4% +* error: 29.6% +* macro_f1: 69.5% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..17e029bb3fc34c0a0f6df56a93fa7e3a7e8f4f5a --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/imagenet/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,372 +* accuracy: 69.5% +* error: 30.5% +* macro_f1: 68.7% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..613c6f2075b9e108823685c33707ea7db26001b5 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_flowers/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_flowers/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,070 +* accuracy: 75.9% +* error: 24.1% +* macro_f1: 70.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d5cd089993c911f253a58f41815413b76d9fa257 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_flowers/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_flowers/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,070 +* accuracy: 75.9% +* error: 24.1% +* macro_f1: 71.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b1048154ea8e363c9620d48b2fd9b455dfd48338 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_flowers/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.VPT', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_flowers/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,052 +* accuracy: 74.6% +* error: 25.4% +* macro_f1: 70.2% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..12f68082b037efb923f1de31b58b1654c2dcd22c --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_pets/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_pets/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,751 +* accuracy: 97.9% +* error: 2.1% +* macro_f1: 98.0% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..16ad9e709e3b514bb3c2edbc47b01d6853733f45 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_pets/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.VPT', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_pets/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,722 +* accuracy: 96.3% +* error: 3.7% +* macro_f1: 96.3% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..81ae43cff1820f108b179aa2888bb6cd3c5fd92b --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_pets/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.4', 'image_encoder.transformer.resblocks.6.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_pets/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,749 +* accuracy: 97.8% +* error: 2.2% +* macro_f1: 97.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..01921535c73a4136711071964c12b35b6b4e1304 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/stanford_cars/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/stanford_cars/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 3,014 +* accuracy: 74.6% +* error: 25.4% +* macro_f1: 73.0% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..881a24a33e1e87cafbd0d6e08d609686266953db --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/stanford_cars/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/stanford_cars/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 2,989 +* accuracy: 74.0% +* error: 26.0% +* macro_f1: 72.7% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea25d91d07ce0447f37cf578c5d67ec2c2328fa0 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/stanford_cars/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/stanford_cars/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 2,995 +* accuracy: 74.2% +* error: 25.8% +* macro_f1: 72.9% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ebfa13560cf12c8a53efd4964e2fb08ef95e00ee --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/sun397/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/sun397/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,761 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.6% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..aff29157e449f88d33818c6c46621ad06ab78773 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/sun397/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/sun397/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,773 +* accuracy: 78.5% +* error: 21.5% +* macro_f1: 77.6% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8c0c37478f3e5bcc0b56097d29a7bfac84d71265 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/sun397/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/sun397/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,795 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 77.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..179940b51811d12d1c5c05a43b8fc863837d599c --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/ucf101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/ucf101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,471 +* accuracy: 79.6% +* error: 20.4% +* macro_f1: 77.6% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e88f44fa678cd8509c6da5a87af2bfc34a631d9 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/ucf101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/ucf101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,480 +* accuracy: 80.0% +* error: 20.0% +* macro_f1: 77.9% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e3ebe51e2b34c91190d349880fcb80f10e8e143e --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/ucf101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/ucf101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,453 +* accuracy: 78.6% +* error: 21.4% +* macro_f1: 76.0% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d026768a97ad0e4329f730ccdf6dd688cc746b2a --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/caltech101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/caltech101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,522 +* accuracy: 98.3% +* error: 1.7% +* macro_f1: 96.6% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2eea292affcfb67533f64559b1e91db916fed940 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/caltech101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/caltech101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,526 +* accuracy: 98.5% +* error: 1.5% +* macro_f1: 97.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d23c481f5862526dd19cab860410a43d1a6d7e1e --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/caltech101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/caltech101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,521 +* accuracy: 98.2% +* error: 1.8% +* macro_f1: 96.5% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..391d769192de017c0c1dc2044b0f5fe651448b05 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/dtd/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/dtd/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 711 +* accuracy: 82.3% +* error: 17.7% +* macro_f1: 82.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..62f3fefa48abfb80adf914e13a465689974c3ce5 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/dtd/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/dtd/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 731 +* accuracy: 84.6% +* error: 15.4% +* macro_f1: 84.6% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..84a949db8661a434b665540311b6895fb7e3135d --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/dtd/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/dtd/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 714 +* accuracy: 82.6% +* error: 17.4% +* macro_f1: 82.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..08712fdd2258e4975e032d63af3b6771091273f3 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/eurosat/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/eurosat/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 4,020 +* accuracy: 95.7% +* error: 4.3% +* macro_f1: 95.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9ac3624960c3be83cd6bb5659f9ddcc3821fa82c --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/eurosat/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.VPT', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/eurosat/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 4,033 +* accuracy: 96.0% +* error: 4.0% +* macro_f1: 96.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9058f45ca79dedbd8dac84391bcda4c7c198259 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/eurosat/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/eurosat/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 4,040 +* accuracy: 96.2% +* error: 3.8% +* macro_f1: 96.2% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ce9ddcbe9cd2515997a75aa84d8cea7984414dd --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/fgvc_aircraft/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/fgvc_aircraft/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 685 +* accuracy: 41.1% +* error: 58.9% +* macro_f1: 38.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7b9b094ed92df56a6b231e8aa2d7f096641a6e9a --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/fgvc_aircraft/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/fgvc_aircraft/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 685 +* accuracy: 41.1% +* error: 58.9% +* macro_f1: 39.3% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..66bf623297dbb4ec5072fcc2c2c75b1ec5abcaa4 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/fgvc_aircraft/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/fgvc_aircraft/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 659 +* accuracy: 39.6% +* error: 60.4% +* macro_f1: 36.9% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..55f6ff08f4b282685a43a26f5da89938dc8f93c0 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/food101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/food101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,819 +* accuracy: 90.3% +* error: 9.7% +* macro_f1: 90.3% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..bfbaeeb824fa8a1350d7154be5765c290fd729d3 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/food101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/food101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,874 +* accuracy: 90.7% +* error: 9.3% +* macro_f1: 90.7% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8cb6f27d2956f37cd250305abea9ecb515edc8e5 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/food101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/food101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,879 +* accuracy: 90.7% +* error: 9.3% +* macro_f1: 90.7% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a642418c1d5d4baa2ddeef570ff6389ee1d7f0f5 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/imagenet/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.VPT', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,499 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.7% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a9d07bb1df477958d95ede852c58f041feecf8e9 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/imagenet/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'prompt_learner.ctx_space.3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,525 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..65b1999a0f1aa05ba3368a1f19fbce891055ac34 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/imagenet/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,535 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.9% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8df818797debb9866e4f1cabf29b2630302fd7b1 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_flowers/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_flowers/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,026 +* accuracy: 97.4% +* error: 2.6% +* macro_f1: 97.6% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c33b8ece3b21437d2f629e801f0ca9c065d4e98a --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_flowers/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_flowers/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,025 +* accuracy: 97.3% +* error: 2.7% +* macro_f1: 97.3% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6f806799ad285e34002d6ef57f16b72dffd717f6 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_flowers/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_flowers/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,034 +* accuracy: 98.2% +* error: 1.8% +* macro_f1: 98.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..be6fb99d2d4ed831130f8b6fc0db6747e4954f5b --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_pets/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_pets/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,795 +* accuracy: 95.4% +* error: 4.6% +* macro_f1: 95.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..909f75a2bc2334007294ab7b3b161d96ba361e6c --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_pets/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_pets/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,800 +* accuracy: 95.7% +* error: 4.3% +* macro_f1: 95.7% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..48660aad5354b0714f1bb68159d3601207bb18a9 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/oxford_pets/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/oxford_pets/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,795 +* accuracy: 95.4% +* error: 4.6% +* macro_f1: 95.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b28c999419444280b4e0291bc6fa04eae14cd938 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/stanford_cars/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/stanford_cars/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,099 +* accuracy: 77.4% +* error: 22.6% +* macro_f1: 76.9% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9fa9aa5f3f02089a55b2c082a8cf602f8a8d9ec4 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/stanford_cars/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/stanford_cars/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,080 +* accuracy: 77.0% +* error: 23.0% +* macro_f1: 76.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..83a8079704dff8ba44226d83b38f176aae174ae4 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/stanford_cars/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/stanford_cars/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,102 +* accuracy: 77.5% +* error: 22.5% +* macro_f1: 76.9% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..738dc215399030b41064fb3b2e093dce7821f253 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/sun397/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/sun397/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,200 +* accuracy: 82.4% +* error: 17.6% +* macro_f1: 82.2% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..230637f8e66bb863dc740ca3bf38b9a348c7ecf1 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/sun397/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/sun397/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,214 +* accuracy: 82.6% +* error: 17.4% +* macro_f1: 82.4% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8353cbb3108de4027a23a6e9d947cb0291e1588b --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/sun397/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_bias2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/sun397/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,155 +* accuracy: 82.0% +* error: 18.0% +* macro_f1: 81.8% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8226ae22a328f99d23b7ca32302f2881bf32e96e --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/ucf101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/ucf101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,679 +* accuracy: 86.8% +* error: 13.2% +* macro_f1: 86.1% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cd608a662088a3e7935d628a9c85f1286d1bb43d --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/ucf101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/ucf101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,678 +* accuracy: 86.8% +* error: 13.2% +* macro_f1: 86.0% diff --git a/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3/log.txt b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..94b90924bf01e9f1dfed361e2aed211f8d04a0b4 --- /dev/null +++ b/output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3/log.txt @@ -0,0 +1,341 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/ucf101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPr/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: True + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/base2new/ucf101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,635 +* accuracy: 84.5% +* error: 15.5% +* macro_f1: 83.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d76d537ff9d444b7238e19043a2dabcead54e5f4 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/caltech101/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/caltech101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 860 +* accuracy: 93.9% +* error: 6.1% +* macro_f1: 94.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ad301ac4ebe69a57ab7f79935884296b7d9181a --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/caltech101/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/caltech101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 865 +* accuracy: 94.4% +* error: 5.6% +* macro_f1: 94.9% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..bdb5c9b4f5573ae10c211c5733d82313e2ab14e4 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/caltech101/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/caltech101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/caltech101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 864 +* accuracy: 94.3% +* error: 5.7% +* macro_f1: 94.6% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..68c39b22f0bbe78c4e9d5b00b7f734d4b27fa8da --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/dtd/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/dtd/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 537 +* accuracy: 64.9% +* error: 35.1% +* macro_f1: 63.6% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..365bab5591f0f4fb416343ffa41d21ee88712957 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/dtd/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/dtd/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 532 +* accuracy: 64.3% +* error: 35.7% +* macro_f1: 63.2% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cda02f0e439a510b0b50b80de900d1d9a29a60bd --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/dtd/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/dtd/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/dtd/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 528 +* accuracy: 63.8% +* error: 36.2% +* macro_f1: 62.9% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c0ce4027989ee5c3b7f029a3b6282c7f1ea62f93 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/eurosat/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/eurosat/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 2,868 +* accuracy: 73.5% +* error: 26.5% +* macro_f1: 72.1% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..43ad46785f0c3f33d49edc1e6fbdf7e5d28acc33 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/eurosat/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/eurosat/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 3,070 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 77.1% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a9efc222f672b6b999008cce9b76c9476cd537b --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/eurosat/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/eurosat/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/eurosat/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 3,333 +* accuracy: 85.5% +* error: 14.5% +* macro_f1: 84.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..238de6accd164e0fb9e77a3a7cb139e5423aa29a --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1/log.txt @@ -0,0 +1,353 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 664 +* accuracy: 39.8% +* error: 60.2% +* macro_f1: 35.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..1ec328f3e9fd4cafc1bfd615ff670dca89b8cef0 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2/log.txt @@ -0,0 +1,353 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 658 +* accuracy: 39.5% +* error: 60.5% +* macro_f1: 35.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..00d86ee18b545bd23a878566059e27b18345b002 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3/log.txt @@ -0,0 +1,353 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/fgvc_aircraft/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 623 +* accuracy: 37.4% +* error: 62.6% +* macro_f1: 34.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fc45e0b7a5878cfc7af1a250577d905b8308df5e --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/food101/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/food101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,775 +* accuracy: 91.8% +* error: 8.2% +* macro_f1: 91.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..054614cc3a615551f8c5bb126f875dc2c72b55c3 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/food101/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/food101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,813 +* accuracy: 92.1% +* error: 7.9% +* macro_f1: 92.1% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c1ad56ed4a34b3097607b81078e514d07f629313 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/food101/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/food101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/food101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,776 +* accuracy: 91.8% +* error: 8.2% +* macro_f1: 91.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8759f46aec337da4ebbd01573fecaf8fcbd8ab00 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,791 +* accuracy: 71.2% +* error: 28.8% +* macro_f1: 70.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b88f1f8ffe1314e313ea55021df6ece0c909188f --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,835 +* accuracy: 71.3% +* error: 28.7% +* macro_f1: 70.6% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2909952f8a5e1434164f43c56310dc7afb2d2a02 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/imagenet/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,722 +* accuracy: 70.9% +* error: 29.1% +* macro_f1: 70.1% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b2d27b7b80d822af1144496e660ecde0e2ec4f24 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_flowers/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_flowers/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,058 +* accuracy: 75.0% +* error: 25.0% +* macro_f1: 69.6% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8dd0c3e10f6a2ae8204ed36b97474fe46b88b138 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_flowers/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_flowers/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,065 +* accuracy: 75.5% +* error: 24.5% +* macro_f1: 71.2% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c605a92ad44899e6defcc62717501052d2e6c30b --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_flowers/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_flowers/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_flowers/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,058 +* accuracy: 75.0% +* error: 25.0% +* macro_f1: 69.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c94dad602319d3425c4f020c2c4f6dc2c35e83d --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_pets/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_pets/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,747 +* accuracy: 97.7% +* error: 2.3% +* macro_f1: 97.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2aeb09f186e0313079508852fe9c467c2f48b46b --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_pets/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_pets/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,742 +* accuracy: 97.4% +* error: 2.6% +* macro_f1: 97.4% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8c4327618857b422fc8784528422f02fa8d0a1e0 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_pets/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/oxford_pets/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_pets/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,750 +* accuracy: 97.9% +* error: 2.1% +* macro_f1: 97.9% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..1ec948f5e94ab640e73998ad0dabbc27db8d44d3 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/stanford_cars/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/stanford_cars/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 3,036 +* accuracy: 75.2% +* error: 24.8% +* macro_f1: 73.5% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6cd5fb9adc58e2e4617b026a808ba1e039920ba --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/stanford_cars/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/stanford_cars/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 3,037 +* accuracy: 75.2% +* error: 24.8% +* macro_f1: 73.6% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7777c95b85ef109f6d1435fa9ed39f7a24ac3a06 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/stanford_cars/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/stanford_cars/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/stanford_cars/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 2,997 +* accuracy: 74.2% +* error: 25.8% +* macro_f1: 73.0% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ca72988182718c44d5baaafaa3cefbd52f745644 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/sun397/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/sun397/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,889 +* accuracy: 79.7% +* error: 20.3% +* macro_f1: 79.0% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..13306e8d891a9e65232802f8501664a17640c5a2 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/sun397/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/sun397/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,909 +* accuracy: 79.9% +* error: 20.1% +* macro_f1: 79.1% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..21cad642783016a558f9b6b6a1d661ba84643f0e --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/sun397/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/sun397/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/sun397/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,875 +* accuracy: 79.5% +* error: 20.5% +* macro_f1: 78.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8cb3223d325418f5e0f0c9ecf90eccc56e84f6bf --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/ucf101/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/ucf101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,492 +* accuracy: 80.7% +* error: 19.3% +* macro_f1: 78.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a0a16659781fa911f40643ff7c502bdb66e54ac --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/ucf101/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/ucf101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,477 +* accuracy: 79.9% +* error: 20.1% +* macro_f1: 77.6% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..27215e67b056802d9e63903df3a935e36cf8733d --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/ucf101/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/test_new/ucf101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/ucf101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,472 +* accuracy: 79.6% +* error: 20.4% +* macro_f1: 77.0% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f18b341f29bb28c301d7cbef96f637791b7b9bba --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/caltech101/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/caltech101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,521 +* accuracy: 98.2% +* error: 1.8% +* macro_f1: 96.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..16021b17afb5192e66d42312e2bd4f228b2b4be5 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/caltech101/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/caltech101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,521 +* accuracy: 98.2% +* error: 1.8% +* macro_f1: 96.4% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ea18e841d7c2f12ce60dbc6273477005c659651 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/caltech101/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/caltech101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/caltech101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,520 +* accuracy: 98.1% +* error: 1.9% +* macro_f1: 96.2% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b3eda7398aade6e9557bff6b9541ac6fb6cdafe --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/dtd/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/dtd/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 724 +* accuracy: 83.8% +* error: 16.2% +* macro_f1: 83.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9611bc8cb101b0087e4f5df419058b66b159c08 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/dtd/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/dtd/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 724 +* accuracy: 83.8% +* error: 16.2% +* macro_f1: 83.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9527f999eff54c2181a942c88b9f0e296b6e4ee6 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/dtd/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/dtd/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/dtd/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 721 +* accuracy: 83.4% +* error: 16.6% +* macro_f1: 83.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e77713d81ed67edba3d7705abbc1d1438b0c5de5 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/eurosat/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/eurosat/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 3,947 +* accuracy: 94.0% +* error: 6.0% +* macro_f1: 94.0% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..eedf812aa6363174a1da2b4a32968f87f41c2f10 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/eurosat/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/eurosat/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 3,997 +* accuracy: 95.2% +* error: 4.8% +* macro_f1: 95.2% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0607204ca708a0a997bc3943c2cfb1cef710418 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/eurosat/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/eurosat/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/eurosat/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 4,016 +* accuracy: 95.6% +* error: 4.4% +* macro_f1: 95.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..28da0bc3f8bd025cecb04a07131a10ae09e8a10f --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1/log.txt @@ -0,0 +1,353 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 673 +* accuracy: 40.4% +* error: 59.6% +* macro_f1: 37.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..803421b579b0bb3b632bdeb0202b90598c51bb8f --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2/log.txt @@ -0,0 +1,353 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 680 +* accuracy: 40.8% +* error: 59.2% +* macro_f1: 38.5% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..80d97fb3f587bdab5472f253a6b2239f3cff14a8 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3/log.txt @@ -0,0 +1,353 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/fgvc_aircraft/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/fgvc_aircraft/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 665 +* accuracy: 39.9% +* error: 60.1% +* macro_f1: 37.4% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2405c99483c746b8f0891160e952a14011a49c9d --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/food101/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/food101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,917 +* accuracy: 91.0% +* error: 9.0% +* macro_f1: 91.0% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c55930b6f1aa0b8d9e082f740a02494276dc0a1c --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/food101/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/food101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,916 +* accuracy: 91.0% +* error: 9.0% +* macro_f1: 90.9% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa91e12be83462947464e9d0c444a8006e3014e7 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/food101/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/food101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/food101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,875 +* accuracy: 90.7% +* error: 9.3% +* macro_f1: 90.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f81a79906aaa257c70aea34c8a20565f5d71ac63 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.image_encoder.VPT'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,620 +* accuracy: 78.5% +* error: 21.5% +* macro_f1: 78.2% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d57e0bae8f1db4670a5fef70cb7d6abaeb71bc50 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,528 +* accuracy: 78.1% +* error: 21.9% +* macro_f1: 77.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..35e8db8a44ef8b250a96b22286495c6aa94a8468 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/imagenet/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,622 +* accuracy: 78.5% +* error: 21.5% +* macro_f1: 78.2% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b576f57b70f387b9740239b2e7e4c456753b447 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_flowers/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_flowers/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,029 +* accuracy: 97.7% +* error: 2.3% +* macro_f1: 97.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b027e77a06e6a060ca63168a009c94167d6517b5 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_flowers/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_flowers/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,035 +* accuracy: 98.3% +* error: 1.7% +* macro_f1: 98.4% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ada28a3db7308895e6c9ce18a2f0c9f9d75e3bbb --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_flowers/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_flowers/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_flowers/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,036 +* accuracy: 98.4% +* error: 1.6% +* macro_f1: 98.3% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e0ab06670992869221e5265bf791cb8da6646c8 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_pets/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_pets/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,807 +* accuracy: 96.1% +* error: 3.9% +* macro_f1: 96.1% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b576cab35d503318c904e263905b7db1af2f9575 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_pets/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_pets/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,796 +* accuracy: 95.5% +* error: 4.5% +* macro_f1: 95.5% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..255451585d0742114b1c24000b92062f9571b966 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/oxford_pets/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/oxford_pets/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/oxford_pets/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,796 +* accuracy: 95.5% +* error: 4.5% +* macro_f1: 95.5% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c4986267ba82e2bcb8034830926a03436e0047e --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/stanford_cars/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/stanford_cars/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,076 +* accuracy: 76.9% +* error: 23.1% +* macro_f1: 76.2% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2456ae9b85b4c18c54b7e45e497caa9b7061ccfb --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/stanford_cars/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/stanford_cars/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,071 +* accuracy: 76.7% +* error: 23.3% +* macro_f1: 76.4% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fd978a0d8b6fb0a48008e852bbac02e1d3569720 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/stanford_cars/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/stanford_cars/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/stanford_cars/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,138 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 78.0% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..03808a801d48b407bf83b711158b488735ed7299 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/sun397/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/sun397/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,227 +* accuracy: 82.7% +* error: 17.3% +* macro_f1: 82.5% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c46b3e500c6209e54cd39690b43076abdcd8271 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/sun397/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/sun397/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,256 +* accuracy: 83.0% +* error: 17.0% +* macro_f1: 82.8% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcf63b1b782c4a27c32ee0432a507dbd675b6aee --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/sun397/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/sun397/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/sun397/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,245 +* accuracy: 82.9% +* error: 17.1% +* macro_f1: 82.7% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d68b5a35335597cd895011b37b419b8e02ea0ab4 --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/ucf101/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/ucf101/shots_16/seed1/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,687 +* accuracy: 87.2% +* error: 12.8% +* macro_f1: 86.5% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..96edc4a404c19454b4a014bb4628316aa38b951e --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/ucf101/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/ucf101/shots_16/seed2/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,676 +* accuracy: 86.7% +* error: 13.3% +* macro_f1: 85.9% diff --git a/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3/log.txt b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7573fe44d02e51bf6a522edd92767015c708f4cf --- /dev/null +++ b/output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3/log.txt @@ -0,0 +1,354 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep10_batch4_4+4ctx.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 10 +model_dir: weights/base2new/SuPrEns/ucf101/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False', 'DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPrEns +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SuPrEns/reproduce_vit_b16_ep10_batch4_4+4ctx/train_base/ucf101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPrEns + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPrEns +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Building custom CLIP +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.2', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.2.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.1.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.0', 'ensemble_model.2.image_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.0.prompt_learner.ctx_space.4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.0.prompt_learner.ctx_space.1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.prompt_learner.ctx_space.0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.prompt_learner.ctx_space.2', 'ensemble_model.2.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.prompt_learner.ctx_space.3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias1', 'ensemble_model.1.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.image_encoder.VPT', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.0.prompt_learner.ctx_space.3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.0.prompt_learner.ctx_space.6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.1', 'ensemble_model.1.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.1.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias1', 'ensemble_model.2.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.2.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.1.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias0', 'ensemble_model.0.prompt_learner.ctx_space.0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.image_encoder.VPT', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.prompt_learner.ctx', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias4', 'ensemble_model.0.image_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias5', 'ensemble_model.1.image_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias0', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.1.image_encoder.VPT', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.1.image_encoder.transformer.resblocks.6.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.2.prompt_learner.ctx_space.6', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias0', 'ensemble_model.1.prompt_learner.ctx_space.3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_bias2', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias6', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_bias0', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale4', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale5', 'ensemble_model.2.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_bias5', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.prompt_learner.ctx_space.4', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_scale1', 'ensemble_model.0.image_encoder.transformer.resblocks.1.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias6', 'ensemble_model.0.image_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.6.VPT_scale3', 'ensemble_model.1.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.1.text_encoder.transformer.resblocks.1.VPT_scale2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias4', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_bias0', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.0.image_encoder.transformer.resblocks.4.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale1', 'ensemble_model.0.prompt_learner.ctx', 'ensemble_model.0.text_encoder.transformer.resblocks.7.VPT_scale5', 'ensemble_model.1.text_encoder.transformer.resblocks.7.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias6', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias1', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.4.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_scale6', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale1', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.text_encoder.transformer.resblocks.8.VPT_scale2', 'ensemble_model.1.prompt_learner.ctx_space.5', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias4', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_scale0', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.1.prompt_learner.ctx_space.6', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.4.VPT_bias3', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_scale2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale4', 'ensemble_model.1.text_encoder.transformer.resblocks.2.VPT_bias3', 'ensemble_model.2.text_encoder.transformer.resblocks.7.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_bias5', 'ensemble_model.2.prompt_learner.ctx_space.1', 'ensemble_model.1.text_encoder.transformer.resblocks.3.VPT_shallow', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_scale0', 'ensemble_model.0.text_encoder.transformer.resblocks.8.VPT_scale4', 'ensemble_model.0.prompt_learner.ctx_space.2', 'ensemble_model.2.text_encoder.transformer.resblocks.5.VPT_bias5', 'ensemble_model.1.text_encoder.transformer.resblocks.8.VPT_bias5', 'ensemble_model.2.image_encoder.transformer.resblocks.7.VPT_shallow', 'ensemble_model.2.text_encoder.transformer.resblocks.6.VPT_bias2', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_bias3', 'ensemble_model.0.text_encoder.transformer.resblocks.3.VPT_scale1', 'ensemble_model.0.text_encoder.transformer.resblocks.5.VPT_scale3', 'ensemble_model.0.text_encoder.transformer.resblocks.2.VPT_bias2', 'ensemble_model.1.text_encoder.transformer.resblocks.6.VPT_scale0', 'ensemble_model.1.text_encoder.transformer.resblocks.5.VPT_bias6', 'ensemble_model.2.text_encoder.transformer.resblocks.1.VPT_scale4', 'ensemble_model.2.text_encoder.transformer.resblocks.3.VPT_bias1', 'ensemble_model.0.text_encoder.transformer.resblocks.1.VPT_scale5'} +Loading evaluator: Classification +ensemble_model.0.prompt_learner.token_prefix +ensemble_model.1.prompt_learner.token_prefix +ensemble_model.2.prompt_learner.token_prefix +Loading weights to SubspacePromptLearner from "weights/base2new/SuPrEns/ucf101/shots_16/seed3/SubspacePromptLearner/model.pth.tar-10" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,662 +* accuracy: 85.9% +* error: 14.1% +* macro_f1: 85.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a80d5cb1ad5bd5f7dbad5cb93e3abe439035dcd --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 865 +* accuracy: 94.4% +* error: 5.6% +* macro_f1: 94.8% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fbd46086b4efbfe45a9cf6bf9ef8476c083f027 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'prompt_learner.ctx_space.6', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 862 +* accuracy: 94.1% +* error: 5.9% +* macro_f1: 94.6% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..73f68a5ba15b2ccdbb4c7530090fc3c3d85e7b4d --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/caltech101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 1,534 +# val 916 +# test 916 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 916 +* correct: 873 +* accuracy: 95.3% +* error: 4.7% +* macro_f1: 95.6% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..943924723a2cd149421e62caf30d4ac37de15eb8 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/dtd/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/dtd/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 522 +* accuracy: 63.0% +* error: 37.0% +* macro_f1: 61.5% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b11d3dd688c809c304d5b47c85ad369b8b80b915 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/dtd/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.VPT', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/dtd/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 536 +* accuracy: 64.7% +* error: 35.3% +* macro_f1: 62.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..78611217771dc1ecd4c6ac7169321999e1674005 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/dtd/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/dtd/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 23 +# train_x 1,380 +# val 828 +# test 828 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/dtd/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 828 +* correct: 519 +* accuracy: 62.7% +* error: 37.3% +* macro_f1: 61.6% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..779474caf21ae8d4d1a857e66f27042b5c0aa608 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 3,152 +* accuracy: 80.8% +* error: 19.2% +* macro_f1: 78.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..38c086a74eaed5233dc1449f18f2a9fd419b7b72 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 3,182 +* accuracy: 81.6% +* error: 18.4% +* macro_f1: 80.6% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ba20d89097f57a4c9da3b69cb96ce092c13ead77 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/eurosat/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 6,500 +# val 3,900 +# test 3,900 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 3,900 +* correct: 3,141 +* accuracy: 80.5% +* error: 19.5% +* macro_f1: 76.7% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c76eb26681aeb6ec06bef60331405fcce76e1a6f --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed1/log.txt @@ -0,0 +1,351 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 645 +* accuracy: 38.7% +* error: 61.3% +* macro_f1: 35.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9ca0d1648b62545acf7a3d93de0829684ff66e1 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed2/log.txt @@ -0,0 +1,351 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 640 +* accuracy: 38.4% +* error: 61.6% +* macro_f1: 34.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7d18fac480088ad7465983c4f30e8d99d61f101a --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed3/log.txt @@ -0,0 +1,351 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/fgvc_aircraft/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: FGVCAircraft +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,667 +# test 1,667 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale7'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,667 +* correct: 637 +* accuracy: 38.2% +* error: 61.8% +* macro_f1: 33.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b06080e7db137ae576422beca390ecd8824fd9b2 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/food101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.7', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/food101/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,699 +* accuracy: 91.3% +* error: 8.7% +* macro_f1: 91.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8ee63e395ac8fccf3cf34dea5a88c9e2e2853256 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/food101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'prompt_learner.ctx_space.5'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/food101/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,767 +* accuracy: 91.8% +* error: 8.2% +* macro_f1: 91.8% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5562e89312c61c80211cf4addaf4926377c3f58 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/food101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/food101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 50 +# train_x 25,000 +# val 15,000 +# test 15,000 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/food101/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 15,000 +* correct: 13,736 +* accuracy: 91.6% +* error: 8.4% +* macro_f1: 91.6% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f8eecf8c1d278b64728484e1456339dba705f2f4 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,510 +* accuracy: 70.0% +* error: 30.0% +* macro_f1: 69.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ddc78c772328a7085f705b97f804ff466e429440 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,465 +* accuracy: 69.9% +* error: 30.1% +* macro_f1: 68.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c3d63e270229856dd8bc93e998b27d8049a73cc6 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/imagenet/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 638,834 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 17,444 +* accuracy: 69.8% +* error: 30.2% +* macro_f1: 68.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8d30bfafbde9b474c64c08289e72082fee26235c --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_scale6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,066 +* accuracy: 75.6% +* error: 24.4% +* macro_f1: 71.4% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..62b61b152c6ee6917caf534125cbf67a942ffeaa --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'prompt_learner.ctx_space.7', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,095 +* accuracy: 77.7% +* error: 22.3% +* macro_f1: 73.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0f8ef180223c75e38a614bd01f9a99d450c6ab98 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_flowers/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 2,349 +# val 1,410 +# test 1,410 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,410 +* correct: 1,078 +* accuracy: 76.5% +* error: 23.5% +* macro_f1: 71.4% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3caf098a97e226783ac05c0c946244ef02d4b75 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,735 +* accuracy: 97.0% +* error: 3.0% +* macro_f1: 97.0% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ecee2ae8c6e072610584979194ab09376c505d91 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.7', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,737 +* accuracy: 97.1% +* error: 2.9% +* macro_f1: 97.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..79ddac72ec8c159b8b83acf8bd46c43b8cc12193 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/oxford_pets/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 18 +# train_x 1,436 +# val 1,788 +# test 1,788 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,788 +* correct: 1,738 +* accuracy: 97.2% +* error: 2.8% +* macro_f1: 97.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..168547c88b4f79b0ea71393cd455d3d02b1992a6 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 3,040 +* accuracy: 75.3% +* error: 24.7% +* macro_f1: 74.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7cdcdb07cb0fc2736353f0bad6f439a10c0aa4fb --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'prompt_learner.ctx_space.6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 3,040 +* accuracy: 75.3% +* error: 24.7% +* macro_f1: 74.1% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7b0464085c776dafc7c8168b50c75522d0a92d19 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/stanford_cars/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,269 +# val 4,039 +# test 4,039 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,039 +* correct: 3,047 +* accuracy: 75.4% +* error: 24.6% +* macro_f1: 74.4% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..715f3e77b57b777b562af730bdc76db5b361c0d4 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/sun397/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.7', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/sun397/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,785 +* accuracy: 78.6% +* error: 21.4% +* macro_f1: 77.7% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..deb9eb27eae2d837f3add97b8fd329ba1aa59cd8 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/sun397/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/sun397/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,760 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 77.5% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..365c9ff7ea71703f46b5273e00526094164c8c18 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/sun397/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/sun397/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 198 +# train_x 7,920 +# val 9,900 +# test 9,900 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/sun397/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 9,900 +* correct: 7,792 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 77.8% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c24ca40b6f618603ae8495c70aaed8cf9a5273e2 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,478 +* accuracy: 79.9% +* error: 20.1% +* macro_f1: 77.4% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b73d1e3bf737c2875b9ef48a206bcfa9a5be4b88 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,462 +* accuracy: 79.1% +* error: 20.9% +* macro_f1: 77.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e75f38a2a1421b8900d34305271d0d865e169b6 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'new'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: new + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/test_new/ucf101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE NEW CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 50 +# train_x 3,713 +# val 1,849 +# test 1,849 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,849 +* correct: 1,456 +* accuracy: 78.7% +* error: 21.3% +* macro_f1: 76.4% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..df6a70b2bbd01b0c1b04b7bf0d35d1c176a30734 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,515 +* accuracy: 97.8% +* error: 2.2% +* macro_f1: 95.5% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8210af69fca82068bbb7ce86bc96462347cc24aa --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,524 +* accuracy: 98.4% +* error: 1.6% +* macro_f1: 97.0% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9956f372d4dfda3842713369afe263817c972f69 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/caltech101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 50 +# train_x 2,594 +# val 1,549 +# test 1,549 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/caltech101/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,549 +* correct: 1,519 +* accuracy: 98.1% +* error: 1.9% +* macro_f1: 96.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..de5414c2f9cc6250cc6b130712cfdc8d7038166f --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/dtd/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/dtd/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 724 +* accuracy: 83.8% +* error: 16.2% +* macro_f1: 83.6% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..594c0ca788f2767d4eb78da919a19239a1911b4a --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/dtd/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'prompt_learner.ctx_space.7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/dtd/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 721 +* accuracy: 83.4% +* error: 16.6% +* macro_f1: 83.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e86e60ae1ac90ceb21aad9d47cc62e44b79ad11 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/dtd/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/dtd/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 24 +# train_x 1,440 +# val 864 +# test 864 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_scale3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/dtd/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 864 +* correct: 730 +* accuracy: 84.5% +* error: 15.5% +* macro_f1: 84.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5203db5ca00e6d2952e32a5a8312966f183a90b0 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 3,914 +* accuracy: 93.2% +* error: 6.8% +* macro_f1: 93.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f77060e26e2a59841e82ea641451cca202ca8e02 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 3,978 +* accuracy: 94.7% +* error: 5.3% +* macro_f1: 94.8% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c0bd80113c5a87e6bad167047308950062bda38f --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/eurosat/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 5 +# train_x 7,000 +# val 4,200 +# test 4,200 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/eurosat/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,200 +* correct: 3,962 +* accuracy: 94.3% +* error: 5.7% +* macro_f1: 94.4% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e455108ec3a16ed1276344f79fa525249263f915 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed1/log.txt @@ -0,0 +1,351 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'prompt_learner.ctx_space.7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 712 +* accuracy: 42.7% +* error: 57.3% +* macro_f1: 40.4% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..07014788c99bc9906f6be9e1683e977b3f1038f7 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed2/log.txt @@ -0,0 +1,351 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 747 +* accuracy: 44.8% +* error: 55.2% +* macro_f1: 43.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c1c432daa6afe964fea94bf519b9dd373f2174de --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed3/log.txt @@ -0,0 +1,351 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/fgvc_aircraft/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: FGVCAircraft +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 50 +# train_x 1,667 +# val 1,666 +# test 1,666 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/fgvc_aircraft/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,666 +* correct: 705 +* accuracy: 42.3% +* error: 57.7% +* macro_f1: 40.5% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6f5473397d35e7a8cd0c620ca3bd0dfe747db64a --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/food101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/food101/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,844 +* accuracy: 90.5% +* error: 9.5% +* macro_f1: 90.5% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d8a91d562e9c1ec9148e4c5be0f3f251becc4c6 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/food101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.5', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_scale3'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/food101/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,908 +* accuracy: 90.9% +* error: 9.1% +* macro_f1: 90.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b5d7ae978e49f834ea380ef3da2dd0cd72ba55f3 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/food101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/food101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 51 +# train_x 25,500 +# val 15,300 +# test 15,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'image_encoder.transformer.resblocks.7.VPT_shallow'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/food101/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 15,300 +* correct: 13,860 +* accuracy: 90.6% +* error: 9.4% +* macro_f1: 90.6% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6a9ee84017f8953aa035572a311c3c5bcc4f0aad --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,602 +* accuracy: 78.4% +* error: 21.6% +* macro_f1: 78.2% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4e3752f7707856435e0880e750815b1ca4fb0d4 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,505 +* accuracy: 78.0% +* error: 22.0% +* macro_f1: 77.8% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a1c8457c8e4ca3eeb34feec3167d6f83fc8f4408 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/imagenet/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: ImageNet +num_shots is -1 +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------- +Dataset ImageNet +# classes 500 +# train_x 642,289 +# val 25,000 +# test 25,000 +--------- -------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias2'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/imagenet/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 25,000 +* correct: 19,570 +* accuracy: 78.3% +* error: 21.7% +* macro_f1: 78.1% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc093111596bd6924fa0386a8e20a4354a40cfe1 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,028 +* accuracy: 97.6% +* error: 2.4% +* macro_f1: 97.5% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..03a3a153cdd6c115bb68b6c081a7bb5f980d5d3c --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias7'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,031 +* accuracy: 97.9% +* error: 2.1% +* macro_f1: 97.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c3fad4ad30395afe24380eff1af98c57612ed98d --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_flowers/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 51 +# train_x 1,744 +# val 1,053 +# test 1,053 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_flowers/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,053 +* correct: 1,034 +* accuracy: 98.2% +* error: 1.8% +* macro_f1: 98.0% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6a695a84284c35f38ecade61b2144edeed221c41 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'prompt_learner.ctx', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_shallow'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,790 +* accuracy: 95.2% +* error: 4.8% +* macro_f1: 95.1% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..98b28564250c72bd3cfd705fc0ca55f59cac7d1d --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,801 +* accuracy: 95.7% +* error: 4.3% +* macro_f1: 95.7% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cde1f1e2df3502b53eeea28a96b731876257212f --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/oxford_pets/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 19 +# train_x 1,508 +# val 1,881 +# test 1,881 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/oxford_pets/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,881 +* correct: 1,801 +* accuracy: 95.7% +* error: 4.3% +* macro_f1: 95.7% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b6f13f3249667daab69456b84b665da3ad30908 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,183 +* accuracy: 79.5% +* error: 20.5% +* macro_f1: 79.1% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0186b188fc528157b9ddc2a5fabae5f0ed07fe90 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_scale7'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,180 +* accuracy: 79.5% +* error: 20.5% +* macro_f1: 79.1% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a683ce8dc575841d35f5f28e17a7dacd1da0be2 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/stanford_cars/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 98 +# train_x 3,240 +# val 4,002 +# test 4,002 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/stanford_cars/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 4,002 +* correct: 3,181 +* accuracy: 79.5% +* error: 20.5% +* macro_f1: 79.0% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9bfa9b2db4df6a81cc5bf6adb53e4e22a776984c --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/sun397/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.0', 'image_encoder.VPT', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'prompt_learner.ctx_space.6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias6'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/sun397/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,258 +* accuracy: 83.0% +* error: 17.0% +* macro_f1: 82.8% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..437a623b79a4c0839d74858b7137bd533efa6f28 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/sun397/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'prompt_learner.ctx_space.2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/sun397/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,204 +* accuracy: 82.5% +* error: 17.5% +* macro_f1: 82.3% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..38e953be9cf5f32cfa5332a95b3d35eecfb433df --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/sun397/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/sun397/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 199 +# train_x 7,960 +# val 9,950 +# test 9,950 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/sun397/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 9,950 +* correct: 8,243 +* accuracy: 82.8% +* error: 17.2% +* macro_f1: 82.7% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed1/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..bedf7f3b48200cc8418aa410f632228079ef893c --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed1/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed1 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale4'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed1/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,689 +* accuracy: 87.3% +* error: 12.7% +* macro_f1: 86.7% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed2/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d7b6a50227f30ced560c72fda435f5f9cbf7342a --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed2/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed2 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed2/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,676 +* accuracy: 86.7% +* error: 13.3% +* macro_f1: 85.9% diff --git a/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed3/log.txt b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae86021e3841d4f66660df89f7e5729be3a94466 --- /dev/null +++ b/output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed3/log.txt @@ -0,0 +1,352 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep20_batch4_4+4ctx_promptsrc.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 20 +model_dir: weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed3 +no_train: False +opts: ['DATASET.SUBSAMPLE_CLASSES', 'base'] +output_dir: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SubspacePromptSRC +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 4 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: base + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.0025 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 20 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/base2new/SubspacePromptSRC/reproduce_vit_b16_ep20_batch4_4+4ctx_promptsrc/train_base/ucf101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SubspacePromptSRC + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 7 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 3 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 45 + SPACE_DIM: 8 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SubspacePromptSRC +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +SUBSAMPLE BASE CLASSES! +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 51 +# train_x 3,926 +# val 1,934 +# test 1,934 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_scale7', 'transformer.resblocks.1.VPT_bias7', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_scale7', 'transformer.resblocks.2.VPT_bias7', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_scale7', 'transformer.resblocks.3.VPT_bias7', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_scale7', 'transformer.resblocks.4.VPT_bias7', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_scale7', 'transformer.resblocks.5.VPT_bias7', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_scale7', 'transformer.resblocks.6.VPT_bias7', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_scale7', 'transformer.resblocks.7.VPT_bias7', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_scale7', 'transformer.resblocks.8.VPT_bias7', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +Independent V-L design +Initial text context: "a photo of a" +Number of context words (tokens) for Language prompting: 4 +Number of context words (tokens) for Vision prompting: 4 +build model vit is True +build model vit is True +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale7', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale7', 'text_encoder.transformer.resblocks.8.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale7', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias7', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias7', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale7', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale7', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'prompt_learner.ctx_space.7', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias7', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias7', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias7', 'text_encoder.transformer.resblocks.4.VPT_scale1'} +Parameters count: 154 +Loading evaluator: Classification +Loading weights to VLPromptLearner from "weights/base2new/SubspacePromptSRC/ucf101/shots_16/seed3/VLPromptLearner/model.pth.tar-20" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,934 +* correct: 1,684 +* accuracy: 87.1% +* error: 12.9% +* macro_f1: 86.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2da7c858a2cb6d2ed38a9d6b1c663408faf3bb73 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 100 +# train_x 4,128 +# val 2,465 +# test 2,465 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 2,465 +* correct: 2,314 +* accuracy: 93.9% +* error: 6.1% +* macro_f1: 91.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..38af37f824f3a981ca33e5e0af6f4f0261fd6560 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 100 +# train_x 4,128 +# val 2,465 +# test 2,465 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.VPT', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 2,465 +* correct: 2,315 +* accuracy: 93.9% +* error: 6.1% +* macro_f1: 91.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e49cfbb6b634cc7441766f01b0e23c9d38c562c0 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/caltech101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 100 +# train_x 4,128 +# val 2,465 +# test 2,465 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 2,465 +* correct: 2,317 +* accuracy: 94.0% +* error: 6.0% +* macro_f1: 91.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e16ab96247e32550625eb2b30e94ab88932cb5b4 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 47 +# train_x 2,820 +# val 1,692 +# test 1,692 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 1,692 +* correct: 779 +* accuracy: 46.0% +* error: 54.0% +* macro_f1: 41.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b50e5ed906d79428e590272e66112160e8b8554 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 47 +# train_x 2,820 +# val 1,692 +# test 1,692 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 1,692 +* correct: 770 +* accuracy: 45.5% +* error: 54.5% +* macro_f1: 40.9% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9b49403f44443313de6c9428c31adff6c11d58fa --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/dtd/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 47 +# train_x 2,820 +# val 1,692 +# test 1,692 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.VPT', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 1,692 +* correct: 807 +* accuracy: 47.7% +* error: 52.3% +* macro_f1: 43.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..22636c1d1ed91e01abbeffdc09b5f22d368af8ec --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 10 +# train_x 13,500 +# val 8,100 +# test 8,100 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.VPT', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 8,100 +* correct: 4,380 +* accuracy: 54.1% +* error: 45.9% +* macro_f1: 47.3% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a917a1b4b06ce3151e565b7b57468be49778fb7 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 10 +# train_x 13,500 +# val 8,100 +# test 8,100 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.VPT', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 8,100 +* correct: 3,705 +* accuracy: 45.7% +* error: 54.3% +* macro_f1: 40.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e1661b587f72369b205ee00df2f6f2fb7ad56ca --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/eurosat/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 10 +# train_x 13,500 +# val 8,100 +# test 8,100 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 8,100 +* correct: 3,986 +* accuracy: 49.2% +* error: 50.8% +* macro_f1: 43.6% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..76c1c946558fb0c7dee5c98e5c9a263fcceda841 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed1/log.txt @@ -0,0 +1,339 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 100 +# train_x 3,334 +# val 3,333 +# test 3,333 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,333 +* correct: 792 +* accuracy: 23.8% +* error: 76.2% +* macro_f1: 20.6% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e96bc9b6f05cf8cce341ef052618b54b04b1aefa --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed2/log.txt @@ -0,0 +1,339 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 100 +# train_x 3,334 +# val 3,333 +# test 3,333 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,333 +* correct: 809 +* accuracy: 24.3% +* error: 75.7% +* macro_f1: 21.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..89ce92eacc88644d1c9f8239609e5e983fa4953c --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed3/log.txt @@ -0,0 +1,339 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/fgvc_aircraft/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 100 +# train_x 3,334 +# val 3,333 +# test 3,333 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.2.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,333 +* correct: 837 +* accuracy: 25.1% +* error: 74.9% +* macro_f1: 22.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..de39cb6c0e19ac4d35f47fbf27c19e25b17334a5 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 101 +# train_x 50,500 +# val 30,300 +# test 30,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 30,300 +* correct: 26,116 +* accuracy: 86.2% +* error: 13.8% +* macro_f1: 86.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..593409d56703d2ec59361ebf2f9441b0fa8ba7a6 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 101 +# train_x 50,500 +# val 30,300 +# test 30,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 30,300 +* correct: 26,176 +* accuracy: 86.4% +* error: 13.6% +* macro_f1: 86.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..864cba5d89327e8f209934b58ccd8632ecec0969 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/food101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 101 +# train_x 50,500 +# val 30,300 +# test 30,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.0', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 30,300 +* correct: 26,124 +* accuracy: 86.2% +* error: 13.8% +* macro_f1: 86.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e02afd9e529d19e6ce9b490066bccbd197bb3f8 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNet +# classes 1,000 +# train_x 1,281,123 +# val 50,000 +# test 50,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx', 'prompt_learner.ctx_space.1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 35,510 +* accuracy: 71.0% +* error: 29.0% +* macro_f1: 70.3% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e568015aeec74253ec87a45a0f7081e44f12d832 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNet +# classes 1,000 +# train_x 1,281,123 +# val 50,000 +# test 50,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.1', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 35,491 +* accuracy: 71.0% +* error: 29.0% +* macro_f1: 70.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce47f2d989c09a0cf1ae5d4517cdfe2e5ad23575 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNet +# classes 1,000 +# train_x 1,281,123 +# val 50,000 +# test 50,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 35,483 +* accuracy: 71.0% +* error: 29.0% +* macro_f1: 70.3% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7ddfc117874a45c1825bfbc54f13815d66597848 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed1/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_a.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetA + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetA +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNetA +# classes 200 +# train_x 7,500 +# test 7,500 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.VPT', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 7,500 +* correct: 3,839 +* accuracy: 51.2% +* error: 48.8% +* macro_f1: 46.8% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..46c5ac6c9c09b93009156bc0d536adadbf3d739e --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed2/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_a.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetA + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetA +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNetA +# classes 200 +# train_x 7,500 +# test 7,500 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.VPT', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 7,500 +* correct: 3,803 +* accuracy: 50.7% +* error: 49.3% +* macro_f1: 46.8% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..85ccfa15ca7d558fe1d69b4ed6b9ad183998608e --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed3/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_a.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetA + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_a/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetA +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNetA +# classes 200 +# train_x 7,500 +# test 7,500 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.VPT', 'prompt_learner.ctx_space.1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 7,500 +* correct: 3,784 +* accuracy: 50.5% +* error: 49.5% +* macro_f1: 45.9% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5369e7d59ccc6d99564bbf0d21d568fa812ed1b3 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed1/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_r.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetR + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetR +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNetR +# classes 200 +# train_x 30,000 +# test 30,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 30,000 +* correct: 23,474 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 75.9% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..db2c01a3836e33523f72dd54cb283c68056120ca --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed2/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_r.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetR + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetR +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNetR +# classes 200 +# train_x 30,000 +# test 30,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 30,000 +* correct: 23,478 +* accuracy: 78.3% +* error: 21.7% +* macro_f1: 75.9% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd2f74aac494c10ca5158d590e8404b2759c527 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed3/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_r.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetR + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_r/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetR +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNetR +# classes 200 +# train_x 30,000 +# test 30,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 30,000 +* correct: 23,457 +* accuracy: 78.2% +* error: 21.8% +* macro_f1: 75.9% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8a28d626b6989da0fd622a4cffe309b523289947 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed1/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_sketch.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetSketch + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetSketch +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------------- +Dataset ImageNetSketch +# classes 1,000 +# train_x 50,889 +# test 50,889 +--------- -------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 50,889 +* correct: 25,367 +* accuracy: 49.8% +* error: 50.2% +* macro_f1: 47.9% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..4749767cab974497caf8646412cc02dd132e9846 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed2/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_sketch.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetSketch + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetSketch +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------------- +Dataset ImageNetSketch +# classes 1,000 +# train_x 50,889 +# test 50,889 +--------- -------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 50,889 +* correct: 25,220 +* accuracy: 49.6% +* error: 50.4% +* macro_f1: 47.5% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3d88cdd8c4edcd32eea4158fd0816ec0818cac8 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed3/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenet_sketch.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetSketch + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenet_sketch/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetSketch +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- -------------- +Dataset ImageNetSketch +# classes 1,000 +# train_x 50,889 +# test 50,889 +--------- -------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.VPT', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 50,889 +* correct: 25,223 +* accuracy: 49.6% +* error: 50.4% +* macro_f1: 47.7% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..49ae7b454f40e1a57c28609976baf05398bf9c4d --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed1/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenetv2.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetV2 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetV2 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset ImageNetV2 +# classes 1,000 +# train_x 10,000 +# test 10,000 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.VPT', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 10,000 +* correct: 6,443 +* accuracy: 64.4% +* error: 35.6% +* macro_f1: 63.5% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5eebca0f7ff2553eb62ada8d64a0aab0315e2cb5 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed2/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenetv2.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetV2 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetV2 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset ImageNetV2 +# classes 1,000 +# train_x 10,000 +# test 10,000 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.VPT'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 10,000 +* correct: 6,450 +* accuracy: 64.5% +* error: 35.5% +* macro_f1: 63.6% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..62e635eda1ce506bf5b5e3551d39f2ff4f0c3d24 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed3/log.txt @@ -0,0 +1,338 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/imagenetv2.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNetV2 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/imagenetv2/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNetV2 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset ImageNetV2 +# classes 1,000 +# train_x 10,000 +# test 10,000 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 10,000 +* correct: 6,430 +* accuracy: 64.3% +* error: 35.7% +* macro_f1: 63.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..05b0afe139e499ea013c9ca49a2bcedf64343afb --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 102 +# train_x 4,093 +# val 2,463 +# test 2,463 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 2,463 +* correct: 1,733 +* accuracy: 70.4% +* error: 29.6% +* macro_f1: 64.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..376f5d4caa0e6463978a33dc2d9124bcf856082e --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 102 +# train_x 4,093 +# val 2,463 +# test 2,463 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 2,463 +* correct: 1,745 +* accuracy: 70.8% +* error: 29.2% +* macro_f1: 64.7% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c0721f96f5ef99e1991c4db112fd0a670373331b --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_flowers/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 102 +# train_x 4,093 +# val 2,463 +# test 2,463 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 2,463 +* correct: 1,740 +* accuracy: 70.6% +* error: 29.4% +* macro_f1: 64.8% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b87445c146932492276e3b86c783c4b0a3f9522c --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 37 +# train_x 2,944 +# val 3,669 +# test 3,669 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,669 +* correct: 3,295 +* accuracy: 89.8% +* error: 10.2% +* macro_f1: 88.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4410bf24c14dd8984b2394b1b61dd0689efa644 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 37 +# train_x 2,944 +# val 3,669 +# test 3,669 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,669 +* correct: 3,292 +* accuracy: 89.7% +* error: 10.3% +* macro_f1: 88.4% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe5bc1b3be4e653f46cb58adc712ac496c6b1ce0 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/oxford_pets/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 37 +# train_x 2,944 +# val 3,669 +# test 3,669 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,669 +* correct: 3,294 +* accuracy: 89.8% +* error: 10.2% +* macro_f1: 88.3% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5556e35f12bff1f7ae35c6b741e84b2524f0057f --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 196 +# train_x 6,509 +# val 8,041 +# test 8,041 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 8,041 +* correct: 5,341 +* accuracy: 66.4% +* error: 33.6% +* macro_f1: 65.0% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..50a2e47607b3dc8ad51c466c3071be4833950cb9 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 196 +# train_x 6,509 +# val 8,041 +# test 8,041 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 8,041 +* correct: 5,347 +* accuracy: 66.5% +* error: 33.5% +* macro_f1: 64.8% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b2dc49206660b4e082abe880737d52b1924f0e72 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/stanford_cars/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 196 +# train_x 6,509 +# val 8,041 +# test 8,041 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.VPT', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 8,041 +* correct: 5,300 +* accuracy: 65.9% +* error: 34.1% +* macro_f1: 64.5% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..683a270d9a9589545cf1866e12825cbc316f015e --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 397 +# train_x 15,880 +# val 19,850 +# test 19,850 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 19,850 +* correct: 13,444 +* accuracy: 67.7% +* error: 32.3% +* macro_f1: 66.3% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c084c01c64c6e0de8db4da93cf03cf762cd3ec8b --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 397 +# train_x 15,880 +# val 19,850 +# test 19,850 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.1', 'image_encoder.VPT'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 19,850 +* correct: 13,425 +* accuracy: 67.6% +* error: 32.4% +* macro_f1: 66.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8694ae6031c81b62fed9c1262e72e445f8ecf5ef --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/sun397/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 397 +# train_x 15,880 +# val 19,850 +# test 19,850 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 19,850 +* correct: 13,390 +* accuracy: 67.5% +* error: 32.5% +* macro_f1: 66.2% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed1/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2a473a2af6e072dc5eb833a80c037605a7bedcc5 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 101 +# train_x 7,639 +# val 3,783 +# test 3,783 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed1/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,783 +* correct: 2,588 +* accuracy: 68.4% +* error: 31.6% +* macro_f1: 65.9% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed2/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8025179f6bd20e22a91998558653d1216856b353 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 101 +# train_x 7,639 +# val 3,783 +# test 3,783 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed2/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,783 +* correct: 2,564 +* accuracy: 67.8% +* error: 32.2% +* macro_f1: 65.3% diff --git a/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed3/log.txt b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..518a95b80138e28b0b7c2d4998f5382ef8f787b8 --- /dev/null +++ b/output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep12_batch8_4+4ctx_cross_datasets.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: 4 +model_dir: weights/cross_dg/imagenet/shots_16/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 10 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/cross_dg/SuPr/reproduce_vit_b16_ep12_batch8_4+4ctx_cross_datasets/ucf101/shots_16/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: last_step + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 1 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.5 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 3 + PROMPT_DEPTH_VISION: 3 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 2 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 101 +# train_x 7,639 +# val 3,783 +# test 3,783 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/cross_dg/imagenet/shots_16/seed3/SubspacePromptLearner/model.pth.tar-4" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 3,783 +* correct: 2,628 +* accuracy: 69.5% +* error: 30.5% +* macro_f1: 66.9% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3f7df34b434ebb38955b79b000e434eec9b83df --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/caltech101/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 100 +# train_x 4,128 +# val 2,465 +# test 2,465 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/caltech101/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 22) +Evaluate on the *test* set +=> result +* total: 2,465 +* correct: 2,351 +* accuracy: 95.4% +* error: 4.6% +* macro_f1: 93.2% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ed071f6987a96f805b3725d2ee39fdb78cbc1cbf --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/caltech101/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 100 +# train_x 4,128 +# val 2,465 +# test 2,465 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/caltech101/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 14) +Evaluate on the *test* set +=> result +* total: 2,465 +* correct: 2,362 +* accuracy: 95.8% +* error: 4.2% +* macro_f1: 93.8% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..761cfa0459f751a261b6eb4a06ddf6e603391399 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/caltech101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/caltech101/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Caltech101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/caltech101/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Caltech101 +Reading split from /mnt/sdb/data/datasets/caltech-101/split_zhou_Caltech101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset Caltech101 +# classes 100 +# train_x 4,128 +# val 2,465 +# test 2,465 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/caltech101/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 11) +Evaluate on the *test* set +=> result +* total: 2,465 +* correct: 2,351 +* accuracy: 95.4% +* error: 4.6% +* macro_f1: 92.9% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a0c5adc1869687655a6f34183341174db63c28fb --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/dtd/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 47 +# train_x 2,820 +# val 1,692 +# test 1,692 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.4', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/dtd/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 1,692 +* correct: 1,092 +* accuracy: 64.5% +* error: 35.5% +* macro_f1: 64.3% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e5bbe6688d1cf18931be58f7231aff2cc6a45425 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/dtd/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 47 +# train_x 2,820 +# val 1,692 +# test 1,692 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/dtd/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 11) +Evaluate on the *test* set +=> result +* total: 1,692 +* correct: 1,103 +* accuracy: 65.2% +* error: 34.8% +* macro_f1: 65.2% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fb14a73ba6c76bbbfbe761e5bf79e5cd9bce0616 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/dtd.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/dtd/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: DescribableTextures + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/dtd/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: DescribableTextures +Reading split from /mnt/sdb/data/datasets/dtd/split_zhou_DescribableTextures.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------------- +Dataset DescribableTextures +# classes 47 +# train_x 2,820 +# val 1,692 +# test 1,692 +--------- ------------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/dtd/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 12) +Evaluate on the *test* set +=> result +* total: 1,692 +* correct: 1,083 +* accuracy: 64.0% +* error: 36.0% +* macro_f1: 63.1% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..11e1d8060f5f3bed93d3b8720cd949ec6ea38f18 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/eurosat/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 10 +# train_x 13,500 +# val 8,100 +# test 8,100 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'image_encoder.VPT', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/eurosat/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 8,100 +* correct: 7,161 +* accuracy: 88.4% +* error: 11.6% +* macro_f1: 88.2% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..26e4b308b3fc4bdc383d48c893c80b255d0a45b1 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/eurosat/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 10 +# train_x 13,500 +# val 8,100 +# test 8,100 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/eurosat/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 20) +Evaluate on the *test* set +=> result +* total: 8,100 +* correct: 6,969 +* accuracy: 86.0% +* error: 14.0% +* macro_f1: 85.7% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9398614e13a50652f9647d9e26f46e2084c28af4 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/eurosat.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/eurosat/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: EuroSAT + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/eurosat/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: EuroSAT +Reading split from /mnt/sdb/data/datasets/eurosat/split_zhou_EuroSAT.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset EuroSAT +# classes 10 +# train_x 13,500 +# val 8,100 +# test 8,100 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/eurosat/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 22) +Evaluate on the *test* set +=> result +* total: 8,100 +* correct: 7,265 +* accuracy: 89.7% +* error: 10.3% +* macro_f1: 89.5% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2de4f6459a1ac957d366224ee5b54873a6aa85e0 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed1/log.txt @@ -0,0 +1,339 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/fgvc_aircraft/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 100 +# train_x 3,334 +# val 3,333 +# test 3,333 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/fgvc_aircraft/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 22) +Evaluate on the *test* set +=> result +* total: 3,333 +* correct: 1,304 +* accuracy: 39.1% +* error: 60.9% +* macro_f1: 37.6% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..af90842b84e4158536265523d367128b175cfd97 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed2/log.txt @@ -0,0 +1,339 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/fgvc_aircraft/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 100 +# train_x 3,334 +# val 3,333 +# test 3,333 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/fgvc_aircraft/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 23) +Evaluate on the *test* set +=> result +* total: 3,333 +* correct: 1,303 +* accuracy: 39.1% +* error: 60.9% +* macro_f1: 37.4% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b7c6a87ab1b0c456e9044377cba5906836f94eb5 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed3/log.txt @@ -0,0 +1,339 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/fgvc_aircraft.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/fgvc_aircraft/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: FGVCAircraft + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/fgvc_aircraft/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: FGVCAircraft +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset FGVCAircraft +# classes 100 +# train_x 3,334 +# val 3,333 +# test 3,333 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/fgvc_aircraft/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 25) +Evaluate on the *test* set +=> result +* total: 3,333 +* correct: 1,247 +* accuracy: 37.4% +* error: 62.6% +* macro_f1: 35.9% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9290b249d5086d7a87d674a23884bd040e16812a --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/food101/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 101 +# train_x 50,500 +# val 30,300 +# test 30,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.6', 'prompt_learner.ctx_space.2', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/food101/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 3) +Evaluate on the *test* set +=> result +* total: 30,300 +* correct: 26,297 +* accuracy: 86.8% +* error: 13.2% +* macro_f1: 86.7% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7d9fafd1fcb0649399ca48865a47c5073c8d8ecf --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/food101/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 101 +# train_x 50,500 +# val 30,300 +# test 30,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/food101/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 4) +Evaluate on the *test* set +=> result +* total: 30,300 +* correct: 26,358 +* accuracy: 87.0% +* error: 13.0% +* macro_f1: 87.0% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..049751aea282f9beb3c80132918e9264d0fa55c2 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/food101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/food101/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: Food101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/food101/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: Food101 +Reading split from /mnt/sdb/data/datasets/food-101/split_zhou_Food101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------- +Dataset Food101 +# classes 101 +# train_x 50,500 +# val 30,300 +# test 30,300 +--------- ------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'prompt_learner.ctx_space.6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/food101/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 9) +Evaluate on the *test* set +=> result +* total: 30,300 +* correct: 26,410 +* accuracy: 87.2% +* error: 12.8% +* macro_f1: 87.2% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..464ef218809e1dee155bebb2c3095870c42a6c2d --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/imagenet/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNet +# classes 1,000 +# train_x 1,281,123 +# val 50,000 +# test 50,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/imagenet/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 12) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 35,788 +* accuracy: 71.6% +* error: 28.4% +* macro_f1: 71.0% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d07a7bf33fee7b2725c3acdabc6871ae226511be --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/imagenet/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNet +# classes 1,000 +# train_x 1,281,123 +# val 50,000 +# test 50,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/imagenet/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 15) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 35,800 +* accuracy: 71.6% +* error: 28.4% +* macro_f1: 71.0% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f72f3a592bc6873ae78cd7fe44ac9125cea0493f --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/imagenet.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/imagenet/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: ImageNet + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/imagenet/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: ImageNet +num_shots is -1 +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- --------- +Dataset ImageNet +# classes 1,000 +# train_x 1,281,123 +# val 50,000 +# test 50,000 +--------- --------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/imagenet/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 50,000 +* correct: 35,683 +* accuracy: 71.4% +* error: 28.6% +* macro_f1: 70.9% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6af0e1260239739ef70105089bac0d13936aab23 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/oxford_flowers/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 102 +# train_x 4,093 +# val 2,463 +# test 2,463 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.5', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/oxford_flowers/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 19) +Evaluate on the *test* set +=> result +* total: 2,463 +* correct: 2,325 +* accuracy: 94.4% +* error: 5.6% +* macro_f1: 93.7% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..36a5db580b89def05980268efa4f95487a64e9ca --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/oxford_flowers/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 102 +# train_x 4,093 +# val 2,463 +# test 2,463 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/oxford_flowers/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 13) +Evaluate on the *test* set +=> result +* total: 2,463 +* correct: 2,303 +* accuracy: 93.5% +* error: 6.5% +* macro_f1: 92.8% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fed0b109996fe2182de02171c8b8be73fac1cbc3 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/oxford_flowers.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/oxford_flowers/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordFlowers + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_flowers/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordFlowers +Reading split from /mnt/sdb/data/datasets/oxford_flowers/split_zhou_OxfordFlowers.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------- +Dataset OxfordFlowers +# classes 102 +# train_x 4,093 +# val 2,463 +# test 2,463 +--------- ------------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/oxford_flowers/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 19) +Evaluate on the *test* set +=> result +* total: 2,463 +* correct: 2,326 +* accuracy: 94.4% +* error: 5.6% +* macro_f1: 94.0% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4a42f6a8b5d2ca5d747e228f8ced785dd091034 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/oxford_pets/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 37 +# train_x 2,944 +# val 3,669 +# test 3,669 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias4'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/oxford_pets/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 16) +Evaluate on the *test* set +=> result +* total: 3,669 +* correct: 3,439 +* accuracy: 93.7% +* error: 6.3% +* macro_f1: 93.7% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7182ef420f5ade4c4158f20c2194727f0b06a444 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/oxford_pets/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 37 +# train_x 2,944 +# val 3,669 +# test 3,669 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/oxford_pets/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 17) +Evaluate on the *test* set +=> result +* total: 3,669 +* correct: 3,445 +* accuracy: 93.9% +* error: 6.1% +* macro_f1: 93.9% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..812fafee13dbe8a83dd497030b35cc979153445c --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/oxford_pets.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/oxford_pets/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: OxfordPets + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/oxford_pets/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: OxfordPets +Reading split from /mnt/sdb/data/datasets/oxford_pets/split_zhou_OxfordPets.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ---------- +Dataset OxfordPets +# classes 37 +# train_x 2,944 +# val 3,669 +# test 3,669 +--------- ---------- +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.4', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/oxford_pets/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 24) +Evaluate on the *test* set +=> result +* total: 3,669 +* correct: 3,450 +* accuracy: 94.0% +* error: 6.0% +* macro_f1: 94.0% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e5eae279fb09624541db15f6bc434ae6b854f17 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/stanford_cars/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 196 +# train_x 6,509 +# val 8,041 +# test 8,041 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.6.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/stanford_cars/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 23) +Evaluate on the *test* set +=> result +* total: 8,041 +* correct: 6,133 +* accuracy: 76.3% +* error: 23.7% +* macro_f1: 75.8% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb224a0b5788529aeddbb198bd76545dc735cb15 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/stanford_cars/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 196 +# train_x 6,509 +# val 8,041 +# test 8,041 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_shallow'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/stanford_cars/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 24) +Evaluate on the *test* set +=> result +* total: 8,041 +* correct: 6,072 +* accuracy: 75.5% +* error: 24.5% +* macro_f1: 75.1% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c09432c9127e2f2784bc985298f4f47d9382b72 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/stanford_cars.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/stanford_cars/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: StanfordCars + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/stanford_cars/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: StanfordCars +Reading split from /mnt/sdb/data/datasets/stanford_cars/split_zhou_StanfordCars.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------------ +Dataset StanfordCars +# classes 196 +# train_x 6,509 +# val 8,041 +# test 8,041 +--------- ------------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/stanford_cars/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 25) +Evaluate on the *test* set +=> result +* total: 8,041 +* correct: 6,186 +* accuracy: 76.9% +* error: 23.1% +* macro_f1: 76.3% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c52233c01e44598e69e509a86e7c7bf24adca8e4 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/sun397/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 397 +# train_x 15,880 +# val 19,850 +# test 19,850 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'prompt_learner.ctx', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/sun397/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 10) +Evaluate on the *test* set +=> result +* total: 19,850 +* correct: 14,662 +* accuracy: 73.9% +* error: 26.1% +* macro_f1: 73.4% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8297fb88cd794c86a1961ff4b246930b440bb747 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/sun397/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 397 +# train_x 15,880 +# val 19,850 +# test 19,850 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'prompt_learner.ctx_space.0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'prompt_learner.ctx_space.6', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'prompt_learner.ctx_space.2', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/sun397/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 15) +Evaluate on the *test* set +=> result +* total: 19,850 +* correct: 14,676 +* accuracy: 73.9% +* error: 26.1% +* macro_f1: 73.6% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..280299023b3796e59c75237f20fdebad85e5e3d0 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/sun397.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/sun397/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: SUN397 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/sun397/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: SUN397 +Reading split from /mnt/sdb/data/datasets/sun397/split_zhou_SUN397.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset SUN397 +# classes 397 +# train_x 15,880 +# val 19,850 +# test 19,850 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'prompt_learner.ctx_space.6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'prompt_learner.ctx_space.3', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias1'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/sun397/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 8) +Evaluate on the *test* set +=> result +* total: 19,850 +* correct: 14,637 +* accuracy: 73.7% +* error: 26.3% +* macro_f1: 73.3% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed1/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed1/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..325a62b68577baaf3aa407300363247551285680 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed1/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/ucf101/shots_4/seed1 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed1 +resume: +root: /mnt/sdb/data/datasets +seed: 1 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed1 +RESUME: +SEED: 1 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 101 +# train_x 7,639 +# val 3,783 +# test 3,783 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'prompt_learner.ctx_space.1', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_bias2'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/ucf101/shots_4/seed1/SubspacePromptLearner/model-best.pth.tar" (epoch = 15) +Evaluate on the *test* set +=> result +* total: 3,783 +* correct: 3,072 +* accuracy: 81.2% +* error: 18.8% +* macro_f1: 80.1% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed2/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed2/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..49a168b45a12af0e8111bff32043be2f03f7c2b0 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed2/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/ucf101/shots_4/seed2 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed2 +resume: +root: /mnt/sdb/data/datasets +seed: 2 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed2 +RESUME: +SEED: 2 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 101 +# train_x 7,639 +# val 3,783 +# test 3,783 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.6.VPT_bias3', 'image_encoder.VPT', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'prompt_learner.ctx_space.5', 'prompt_learner.ctx_space.6', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale6'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/ucf101/shots_4/seed2/SubspacePromptLearner/model-best.pth.tar" (epoch = 18) +Evaluate on the *test* set +=> result +* total: 3,783 +* correct: 3,083 +* accuracy: 81.5% +* error: 18.5% +* macro_f1: 80.6% diff --git a/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed3/log.txt b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed3/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a64362ea0802a85820c87b8a6f5315390c8bfa44 --- /dev/null +++ b/output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed3/log.txt @@ -0,0 +1,340 @@ +*************** +** Arguments ** +*************** +backbone: +config_file: configs/trainers/SuPr/vit_b16_ep25_batch8_4+4ctx_few_shot.yaml +dataset_config_file: configs/datasets/ucf101.yaml +eval_only: True +head: +load_epoch: None +model_dir: weights/fewshot/ucf101/shots_4/seed3 +no_train: False +opts: ['TRAINER.SUPR.SVD', 'False'] +output_dir: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed3 +resume: +root: /mnt/sdb/data/datasets +seed: 3 +source_domains: None +target_domains: None +trainer: SuPr +transforms: None +************ +** Config ** +************ +DATALOADER: + K_TRANSFORMS: 1 + NUM_WORKERS: 8 + RETURN_IMG0: False + TEST: + BATCH_SIZE: 100 + SAMPLER: SequentialSampler + TRAIN_U: + BATCH_SIZE: 32 + N_DOMAIN: 0 + N_INS: 16 + SAME_AS_X: True + SAMPLER: RandomSampler + TRAIN_X: + BATCH_SIZE: 8 + N_DOMAIN: 0 + N_INS: 16 + SAMPLER: RandomSampler +DATASET: + ALL_AS_UNLABELED: False + CIFAR_C_LEVEL: 1 + CIFAR_C_TYPE: + NAME: UCF101 + NUM_LABELED: -1 + NUM_SHOTS: -1 + ROOT: /mnt/sdb/data/datasets + SOURCE_DOMAINS: () + STL10_FOLD: -1 + SUBSAMPLE_CLASSES: all + TARGET_DOMAINS: () + VAL_PERCENT: 0.1 +INPUT: + COLORJITTER_B: 0.4 + COLORJITTER_C: 0.4 + COLORJITTER_H: 0.1 + COLORJITTER_S: 0.4 + CROP_PADDING: 4 + CUTOUT_LEN: 16 + CUTOUT_N: 1 + GB_K: 21 + GB_P: 0.5 + GN_MEAN: 0.0 + GN_STD: 0.15 + INTERPOLATION: bicubic + NO_TRANSFORM: False + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + RANDAUGMENT_M: 10 + RANDAUGMENT_N: 2 + RGS_P: 0.2 + RRCROP_SCALE: (0.08, 1.0) + SIZE: (224, 224) + TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize') +MODEL: + BACKBONE: + NAME: ViT-B/16 + PRETRAINED: True + HEAD: + ACTIVATION: relu + BN: True + DROPOUT: 0.0 + HIDDEN_LAYERS: () + NAME: + INIT_WEIGHTS: +OPTIM: + ADAM_BETA1: 0.9 + ADAM_BETA2: 0.999 + BASE_LR_MULT: 0.1 + GAMMA: 0.1 + LR: 0.005 + LR_EXP: 6.5 + LR_SCHEDULER: cosine + MAX_EPOCH: 25 + MOMENTUM: 0.9 + NAME: sgd + NEW_LAYERS: ['linear_probe', 'film'] + RMSPROP_ALPHA: 0.99 + SGD_DAMPNING: 0 + SGD_NESTEROV: False + STAGED_LR: False + STEPSIZE: (-1,) + WARMUP_CONS_LR: 1e-05 + WARMUP_EPOCH: 1 + WARMUP_MIN_LR: 1e-05 + WARMUP_RECOUNT: True + WARMUP_TYPE: constant + WEIGHT_DECAY: 0.0005 +OUTPUT_DIR: output/fewshot/SuPr/reproduce_vit_b16_ep25_batch8_4+4ctx_few_shot/ucf101/shots_4/seed3 +RESUME: +SEED: 3 +TEST: + COMPUTE_CMAT: False + EVALUATOR: Classification + FINAL_MODEL: best_val + NO_TEST: False + PER_CLASS_RESULT: False + SPLIT: test +TRAIN: + CHECKPOINT_FREQ: 0 + COUNT_ITER: train_x + PRINT_FREQ: 20 +TRAINER: + CDAC: + CLASS_LR_MULTI: 10 + P_THRESH: 0.95 + RAMPUP_COEF: 30 + RAMPUP_ITRS: 1000 + STRONG_TRANSFORMS: () + TOPK_MATCH: 5 + COCOOP: + CTX_INIT: + N_CTX: 16 + PREC: fp16 + COOP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 16 + PREC: fp16 + W: 8.0 + CROSSGRAD: + ALPHA_D: 0.5 + ALPHA_F: 0.5 + EPS_D: 1.0 + EPS_F: 1.0 + DAEL: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DAELDG: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 0.5 + DDAIG: + ALPHA: 0.5 + CLAMP: False + CLAMP_MAX: 1.0 + CLAMP_MIN: -1.0 + G_ARCH: + LMDA: 0.3 + WARMUP: 0 + DOMAINMIX: + ALPHA: 1.0 + BETA: 1.0 + TYPE: crossdomain + ENTMIN: + LMDA: 0.001 + FILM: + LINEAR_PROBE: True + FIXMATCH: + CONF_THRE: 0.95 + STRONG_TRANSFORMS: () + WEIGHT_U: 1.0 + IVLP: + CTX_INIT: a photo of a + N_CTX_TEXT: 2 + N_CTX_VISION: 2 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + LINEAR_PROBE: + TEST_TIME_FUSION: True + TYPE: linear + WEIGHT: 0.3 + M3SDA: + LMDA: 0.5 + N_STEP_F: 4 + MAPLE: + CTX_INIT: a photo of a + N_CTX: 2 + PREC: fp16 + PROMPT_DEPTH: 9 + MCD: + N_STEP_F: 4 + MEANTEACHER: + EMA_ALPHA: 0.999 + RAMPUP: 5 + WEIGHT_U: 1.0 + MIXMATCH: + MIXUP_BETA: 0.75 + RAMPUP: 20000 + TEMP: 2.0 + WEIGHT_U: 100.0 + MME: + LMDA: 0.1 + NAME: SuPr + PROMPTSRC: + CTX_INIT: a photo of a + GPA_MEAN: 15 + GPA_STD: 1 + IMAGE_LOSS_WEIGHT: 10 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + TEXT_LOSS_WEIGHT: 25 + SE: + CONF_THRE: 0.95 + EMA_ALPHA: 0.999 + RAMPUP: 300 + SUPR: + CTX_INIT: a photo of a + ENSEMBLE_NUM: 1 + HARD_PROMPT_PATH: configs/trainers/SuPr/hard_prompts/ + LAMBDA: 0.7 + N_CTX_TEXT: 4 + N_CTX_VISION: 4 + PREC: fp16 + PROMPT_DEPTH_TEXT: 9 + PROMPT_DEPTH_VISION: 9 + REG_LOSS_WEIGHT: 60 + SPACE_DIM: 7 + SVD: False + TRAINER_BACKBONE: SuPr + TCP: + CLASS_TOKEN_POSITION: end + CSC: False + CTX_INIT: + N_CTX: 4 + PREC: fp16 + W: 1.0 +USE_CUDA: True +VERBOSE: True +VERSION: 1 +Collecting env info ... +** System info ** +PyTorch version: 1.10.2+cu113 +Is debug build: False +CUDA used to build PyTorch: 11.3 +ROCM used to build PyTorch: N/A + +OS: Ubuntu 22.04.5 LTS (x86_64) +GCC version: (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 +Clang version: Could not collect +CMake version: Could not collect +Libc version: glibc-2.35 + +Python version: 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0] (64-bit runtime) +Python platform: Linux-6.8.0-51-generic-x86_64-with-glibc2.10 +Is CUDA available: True +CUDA runtime version: 11.5.119 +GPU models and configuration: +GPU 0: NVIDIA GeForce RTX 4090 +GPU 1: NVIDIA GeForce RTX 4090 +GPU 2: NVIDIA GeForce RTX 4090 +GPU 3: NVIDIA GeForce RTX 4090 +GPU 4: NVIDIA GeForce RTX 4090 +GPU 5: NVIDIA GeForce RTX 4090 +GPU 6: NVIDIA GeForce RTX 4090 +GPU 7: NVIDIA GeForce RTX 4090 + +Nvidia driver version: 550.90.07 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A + +Versions of relevant libraries: +[pip3] numpy==1.24.3 +[pip3] open-clip-torch==2.20.0 +[pip3] torch==1.10.2+cu113 +[pip3] torchaudio==0.10.2+cu113 +[pip3] torchvision==0.11.3+cu113 +[conda] blas 1.0 mkl https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free +[conda] cudatoolkit 10.2.89 h713d32c_11 https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge +[conda] ffmpeg 4.3 hf484d3e_0 pytorch +[conda] mkl 2023.1.0 h6d00ec8_46342 +[conda] mkl-service 2.4.0 py38h5eee18b_1 +[conda] mkl_fft 1.3.6 py38h417a72b_1 +[conda] mkl_random 1.2.2 py38h417a72b_1 +[conda] numpy 1.24.3 py38hf6e8229_1 +[conda] numpy-base 1.24.3 py38h060ed82_1 +[conda] open-clip-torch 2.20.0 pypi_0 pypi +[conda] pytorch-mutex 1.0 cpu pytorch +[conda] torch 1.10.2+cu113 pypi_0 pypi +[conda] torchaudio 0.10.2+cu113 pypi_0 pypi +[conda] torchvision 0.11.3+cu113 pypi_0 pypi + Pillow (9.4.0) + +Loading trainer: SuPr +Loading dataset: UCF101 +Reading split from /mnt/sdb/data/datasets/ucf101/split_zhou_UCF101.json +Building transform_train ++ random resized crop (size=(224, 224), scale=(0.08, 1.0)) ++ random flip ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +Building transform_test ++ resize the smaller edge to 224 ++ 224x224 center crop ++ to torch tensor of range [0, 1] ++ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) +--------- ------ +Dataset UCF101 +# classes 101 +# train_x 7,639 +# val 3,783 +# test 3,783 +--------- ------ +Loading CLIP (backbone: ViT-B/16) +build model vit is True +Weights not found for some missing keys: ['visual.VPT', 'visual.transformer.resblocks.1.VPT_shallow', 'visual.transformer.resblocks.2.VPT_shallow', 'visual.transformer.resblocks.3.VPT_shallow', 'visual.transformer.resblocks.4.VPT_shallow', 'visual.transformer.resblocks.5.VPT_shallow', 'visual.transformer.resblocks.6.VPT_shallow', 'visual.transformer.resblocks.7.VPT_shallow', 'visual.transformer.resblocks.8.VPT_shallow', 'transformer.resblocks.1.VPT_scale0', 'transformer.resblocks.1.VPT_bias0', 'transformer.resblocks.1.VPT_scale1', 'transformer.resblocks.1.VPT_bias1', 'transformer.resblocks.1.VPT_scale2', 'transformer.resblocks.1.VPT_bias2', 'transformer.resblocks.1.VPT_scale3', 'transformer.resblocks.1.VPT_bias3', 'transformer.resblocks.1.VPT_scale4', 'transformer.resblocks.1.VPT_bias4', 'transformer.resblocks.1.VPT_scale5', 'transformer.resblocks.1.VPT_bias5', 'transformer.resblocks.1.VPT_scale6', 'transformer.resblocks.1.VPT_bias6', 'transformer.resblocks.1.VPT_shallow', 'transformer.resblocks.2.VPT_scale0', 'transformer.resblocks.2.VPT_bias0', 'transformer.resblocks.2.VPT_scale1', 'transformer.resblocks.2.VPT_bias1', 'transformer.resblocks.2.VPT_scale2', 'transformer.resblocks.2.VPT_bias2', 'transformer.resblocks.2.VPT_scale3', 'transformer.resblocks.2.VPT_bias3', 'transformer.resblocks.2.VPT_scale4', 'transformer.resblocks.2.VPT_bias4', 'transformer.resblocks.2.VPT_scale5', 'transformer.resblocks.2.VPT_bias5', 'transformer.resblocks.2.VPT_scale6', 'transformer.resblocks.2.VPT_bias6', 'transformer.resblocks.2.VPT_shallow', 'transformer.resblocks.3.VPT_scale0', 'transformer.resblocks.3.VPT_bias0', 'transformer.resblocks.3.VPT_scale1', 'transformer.resblocks.3.VPT_bias1', 'transformer.resblocks.3.VPT_scale2', 'transformer.resblocks.3.VPT_bias2', 'transformer.resblocks.3.VPT_scale3', 'transformer.resblocks.3.VPT_bias3', 'transformer.resblocks.3.VPT_scale4', 'transformer.resblocks.3.VPT_bias4', 'transformer.resblocks.3.VPT_scale5', 'transformer.resblocks.3.VPT_bias5', 'transformer.resblocks.3.VPT_scale6', 'transformer.resblocks.3.VPT_bias6', 'transformer.resblocks.3.VPT_shallow', 'transformer.resblocks.4.VPT_scale0', 'transformer.resblocks.4.VPT_bias0', 'transformer.resblocks.4.VPT_scale1', 'transformer.resblocks.4.VPT_bias1', 'transformer.resblocks.4.VPT_scale2', 'transformer.resblocks.4.VPT_bias2', 'transformer.resblocks.4.VPT_scale3', 'transformer.resblocks.4.VPT_bias3', 'transformer.resblocks.4.VPT_scale4', 'transformer.resblocks.4.VPT_bias4', 'transformer.resblocks.4.VPT_scale5', 'transformer.resblocks.4.VPT_bias5', 'transformer.resblocks.4.VPT_scale6', 'transformer.resblocks.4.VPT_bias6', 'transformer.resblocks.4.VPT_shallow', 'transformer.resblocks.5.VPT_scale0', 'transformer.resblocks.5.VPT_bias0', 'transformer.resblocks.5.VPT_scale1', 'transformer.resblocks.5.VPT_bias1', 'transformer.resblocks.5.VPT_scale2', 'transformer.resblocks.5.VPT_bias2', 'transformer.resblocks.5.VPT_scale3', 'transformer.resblocks.5.VPT_bias3', 'transformer.resblocks.5.VPT_scale4', 'transformer.resblocks.5.VPT_bias4', 'transformer.resblocks.5.VPT_scale5', 'transformer.resblocks.5.VPT_bias5', 'transformer.resblocks.5.VPT_scale6', 'transformer.resblocks.5.VPT_bias6', 'transformer.resblocks.5.VPT_shallow', 'transformer.resblocks.6.VPT_scale0', 'transformer.resblocks.6.VPT_bias0', 'transformer.resblocks.6.VPT_scale1', 'transformer.resblocks.6.VPT_bias1', 'transformer.resblocks.6.VPT_scale2', 'transformer.resblocks.6.VPT_bias2', 'transformer.resblocks.6.VPT_scale3', 'transformer.resblocks.6.VPT_bias3', 'transformer.resblocks.6.VPT_scale4', 'transformer.resblocks.6.VPT_bias4', 'transformer.resblocks.6.VPT_scale5', 'transformer.resblocks.6.VPT_bias5', 'transformer.resblocks.6.VPT_scale6', 'transformer.resblocks.6.VPT_bias6', 'transformer.resblocks.6.VPT_shallow', 'transformer.resblocks.7.VPT_scale0', 'transformer.resblocks.7.VPT_bias0', 'transformer.resblocks.7.VPT_scale1', 'transformer.resblocks.7.VPT_bias1', 'transformer.resblocks.7.VPT_scale2', 'transformer.resblocks.7.VPT_bias2', 'transformer.resblocks.7.VPT_scale3', 'transformer.resblocks.7.VPT_bias3', 'transformer.resblocks.7.VPT_scale4', 'transformer.resblocks.7.VPT_bias4', 'transformer.resblocks.7.VPT_scale5', 'transformer.resblocks.7.VPT_bias5', 'transformer.resblocks.7.VPT_scale6', 'transformer.resblocks.7.VPT_bias6', 'transformer.resblocks.7.VPT_shallow', 'transformer.resblocks.8.VPT_scale0', 'transformer.resblocks.8.VPT_bias0', 'transformer.resblocks.8.VPT_scale1', 'transformer.resblocks.8.VPT_bias1', 'transformer.resblocks.8.VPT_scale2', 'transformer.resblocks.8.VPT_bias2', 'transformer.resblocks.8.VPT_scale3', 'transformer.resblocks.8.VPT_bias3', 'transformer.resblocks.8.VPT_scale4', 'transformer.resblocks.8.VPT_bias4', 'transformer.resblocks.8.VPT_scale5', 'transformer.resblocks.8.VPT_bias5', 'transformer.resblocks.8.VPT_scale6', 'transformer.resblocks.8.VPT_bias6', 'transformer.resblocks.8.VPT_shallow'] +Building custom CLIP +Initial context: "a photo of a" +Number of context words (tokens): 4 +build model vit is True +Turning off gradients in both the image and the text encoder +Parameters to be updated: {'text_encoder.transformer.resblocks.6.VPT_scale2', 'text_encoder.transformer.resblocks.3.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale1', 'text_encoder.transformer.resblocks.3.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale1', 'text_encoder.transformer.resblocks.2.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias6', 'image_encoder.transformer.resblocks.1.VPT_shallow', 'image_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_scale1', 'prompt_learner.ctx_space.1', 'text_encoder.transformer.resblocks.2.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_scale6', 'text_encoder.transformer.resblocks.4.VPT_bias3', 'text_encoder.transformer.resblocks.8.VPT_bias4', 'text_encoder.transformer.resblocks.1.VPT_bias0', 'text_encoder.transformer.resblocks.4.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias6', 'text_encoder.transformer.resblocks.3.VPT_bias1', 'text_encoder.transformer.resblocks.4.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale6', 'prompt_learner.ctx_space.6', 'prompt_learner.ctx_space.5', 'text_encoder.transformer.resblocks.5.VPT_bias1', 'text_encoder.transformer.resblocks.3.VPT_scale2', 'text_encoder.transformer.resblocks.8.VPT_scale1', 'text_encoder.transformer.resblocks.1.VPT_scale1', 'image_encoder.VPT', 'image_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_bias4', 'text_encoder.transformer.resblocks.8.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale4', 'prompt_learner.ctx', 'text_encoder.transformer.resblocks.8.VPT_scale5', 'text_encoder.transformer.resblocks.3.VPT_bias5', 'text_encoder.transformer.resblocks.8.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_scale3', 'text_encoder.transformer.resblocks.5.VPT_scale3', 'text_encoder.transformer.resblocks.6.VPT_bias2', 'prompt_learner.ctx_space.0', 'text_encoder.transformer.resblocks.1.VPT_bias3', 'image_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_scale1', 'text_encoder.transformer.resblocks.8.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale6', 'prompt_learner.ctx_space.4', 'text_encoder.transformer.resblocks.7.VPT_bias2', 'text_encoder.transformer.resblocks.5.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_shallow', 'text_encoder.transformer.resblocks.4.VPT_scale3', 'text_encoder.transformer.resblocks.3.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_bias4', 'text_encoder.transformer.resblocks.4.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_bias1', 'text_encoder.transformer.resblocks.2.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_scale2', 'text_encoder.transformer.resblocks.5.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias3', 'text_encoder.transformer.resblocks.2.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_bias6', 'text_encoder.transformer.resblocks.7.VPT_bias5', 'text_encoder.transformer.resblocks.3.VPT_bias3', 'text_encoder.transformer.resblocks.6.VPT_scale5', 'text_encoder.transformer.resblocks.8.VPT_bias1', 'image_encoder.transformer.resblocks.3.VPT_shallow', 'text_encoder.transformer.resblocks.5.VPT_scale6', 'text_encoder.transformer.resblocks.2.VPT_bias4', 'text_encoder.transformer.resblocks.3.VPT_shallow', 'image_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias5', 'text_encoder.transformer.resblocks.7.VPT_scale0', 'text_encoder.transformer.resblocks.4.VPT_bias1', 'text_encoder.transformer.resblocks.1.VPT_scale2', 'text_encoder.transformer.resblocks.4.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_bias3', 'text_encoder.transformer.resblocks.4.VPT_bias2', 'text_encoder.transformer.resblocks.4.VPT_scale6', 'image_encoder.transformer.resblocks.4.VPT_shallow', 'text_encoder.transformer.resblocks.2.VPT_scale6', 'text_encoder.transformer.resblocks.3.VPT_scale4', 'text_encoder.transformer.resblocks.4.VPT_scale0', 'text_encoder.transformer.resblocks.5.VPT_scale0', 'text_encoder.transformer.resblocks.1.VPT_bias1', 'image_encoder.transformer.resblocks.7.VPT_shallow', 'text_encoder.transformer.resblocks.1.VPT_bias2', 'text_encoder.transformer.resblocks.6.VPT_bias6', 'text_encoder.transformer.resblocks.1.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale0', 'text_encoder.transformer.resblocks.8.VPT_scale3', 'text_encoder.transformer.resblocks.1.VPT_bias4', 'text_encoder.transformer.resblocks.2.VPT_bias0', 'text_encoder.transformer.resblocks.1.VPT_scale5', 'text_encoder.transformer.resblocks.4.VPT_scale2', 'text_encoder.transformer.resblocks.7.VPT_bias1', 'text_encoder.transformer.resblocks.5.VPT_bias5', 'text_encoder.transformer.resblocks.2.VPT_shallow', 'text_encoder.transformer.resblocks.6.VPT_bias0', 'text_encoder.transformer.resblocks.3.VPT_scale6', 'text_encoder.transformer.resblocks.5.VPT_bias0', 'text_encoder.transformer.resblocks.7.VPT_scale4', 'text_encoder.transformer.resblocks.2.VPT_scale5', 'text_encoder.transformer.resblocks.5.VPT_scale1', 'text_encoder.transformer.resblocks.5.VPT_shallow', 'text_encoder.transformer.resblocks.8.VPT_bias5', 'prompt_learner.ctx_space.3', 'text_encoder.transformer.resblocks.5.VPT_scale5', 'text_encoder.transformer.resblocks.6.VPT_scale3', 'text_encoder.transformer.resblocks.7.VPT_bias0', 'text_encoder.transformer.resblocks.8.VPT_scale4', 'text_encoder.transformer.resblocks.5.VPT_scale4', 'text_encoder.transformer.resblocks.6.VPT_scale4', 'text_encoder.transformer.resblocks.1.VPT_scale3', 'text_encoder.transformer.resblocks.2.VPT_bias3', 'text_encoder.transformer.resblocks.1.VPT_bias6', 'text_encoder.transformer.resblocks.2.VPT_bias2', 'text_encoder.transformer.resblocks.7.VPT_bias4', 'text_encoder.transformer.resblocks.7.VPT_shallow', 'prompt_learner.ctx_space.2', 'text_encoder.transformer.resblocks.6.VPT_bias1', 'text_encoder.transformer.resblocks.8.VPT_scale2', 'text_encoder.transformer.resblocks.6.VPT_bias3'} +Loading evaluator: Classification +Loading weights to SubspacePromptLearner from "weights/fewshot/ucf101/shots_4/seed3/SubspacePromptLearner/model-best.pth.tar" (epoch = 19) +Evaluate on the *test* set +=> result +* total: 3,783 +* correct: 3,063 +* accuracy: 81.0% +* error: 19.0% +* macro_f1: 79.7% diff --git a/parse_results/.DS_Store b/parse_results/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..59dfc08314b784de4f59757cc38c715fefe2fe6d Binary files /dev/null and b/parse_results/.DS_Store differ diff --git a/parse_results/supr/form_results_base2new.xlsx b/parse_results/supr/form_results_base2new.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..17fcccb12e2c1534007f1201e195b45309be36a1 Binary files /dev/null and b/parse_results/supr/form_results_base2new.xlsx differ diff --git a/parse_results/supr/form_results_cross.xlsx b/parse_results/supr/form_results_cross.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7fc4e1594f0219c39257580db81a3b69aa95d282 Binary files /dev/null and b/parse_results/supr/form_results_cross.xlsx differ diff --git a/parse_results/supr/form_results_dg.xlsx b/parse_results/supr/form_results_dg.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..abd98c67aa379489a21415daa5391a843d2d1f08 Binary files /dev/null and b/parse_results/supr/form_results_dg.xlsx differ diff --git a/parse_results/supr/form_results_fewshot.xlsx b/parse_results/supr/form_results_fewshot.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e453aaba0587a60f3bb063c392bfb29ce6322be0 Binary files /dev/null and b/parse_results/supr/form_results_fewshot.xlsx differ diff --git a/parse_results/supr_ens/form_results_base2new.xlsx b/parse_results/supr_ens/form_results_base2new.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..428b229b005e4d5a167044299108b7e4fdf44b55 Binary files /dev/null and b/parse_results/supr_ens/form_results_base2new.xlsx differ diff --git a/parse_results/supr_promptsrc/form_results_base2new.xlsx b/parse_results/supr_promptsrc/form_results_base2new.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5fe443bc402db618b712153f6c06bde493b4946a Binary files /dev/null and b/parse_results/supr_promptsrc/form_results_base2new.xlsx differ diff --git a/parse_test_res.py b/parse_test_res.py new file mode 100644 index 0000000000000000000000000000000000000000..6ba5cef9b0ce2adb117e58bb46d041316d4245ba --- /dev/null +++ b/parse_test_res.py @@ -0,0 +1,334 @@ +""" +Goal +--- +1. Read test results from log.txt files +2. Compute mean and std across different folders (seeds) +3. Compute all datasets' accuracy and h-mean +4. Save the results to an Excel file +Usage +--- +Assume the output files are saved under output/my_experiment, +which contains results of different seeds, e.g., + +my_experiment/ + seed1/ + log.txt + seed2/ + log.txt + seed3/ + log.txt + +Run the following command from the root directory: + +$ python tools/parse_test_res.py output/my_experiment + +Add --ci95 to the argument if you wanna get 95% confidence +interval instead of standard deviation: + +$ python tools/parse_test_res.py output/my_experiment --ci95 + +If my_experiment/ has the following structure, + +my_experiment/ + exp-1/ + seed1/ + log.txt + ... + seed2/ + log.txt + ... + seed3/ + log.txt + ... + exp-2/ + ... + exp-3/ + ... + +Run + +$ python tools/parse_test_res.py output/my_experiment --multi-exp +""" +import re +import numpy as np +import os.path as osp +import argparse +import pandas as pd +from collections import OrderedDict, defaultdict + +from dassl.utils import check_isfile, listdir_nohidden + + +b2n_dataset = [ + "imagenet", + "caltech101", + "fgvc_aircraft", + "oxford_flowers", + "dtd", + "eurosat", + "food101", + "oxford_pets", + "stanford_cars", + "sun397", + "ucf101", + ] +cross_dataset = [ + "caltech101", + "fgvc_aircraft", + "oxford_flowers", + "dtd", + "eurosat", + "food101", + "oxford_pets", + "stanford_cars", + "sun397", + "ucf101", + ] +dg_dataset = [ + "imagenet", + "imagenetv2", + "imagenet_sketch", + "imagenet_a", + "imagenet_r", + ] +def compute_ci95(res): + return 1.96 * np.std(res) / np.sqrt(len(res)) + + +def parse_function(*metrics, directory="", args=None, end_signal=None): + print(f"Parsing files in {directory}") + output_results = OrderedDict() + output_results['accuracy'] = 0.0 + + try: + subdirs = listdir_nohidden(directory, sort=True) + except: + print("no folder") + return output_results + + # subdirs = [directory] + outputs = [] + + for subdir in subdirs: + fpath = osp.join(directory, subdir, "log.txt") + # fpath = osp.join(directory, "log.txt") + assert check_isfile(fpath) + good_to_go = False + output = OrderedDict() + + with open(fpath, "r") as f: + lines = f.readlines() + + for line in lines: + line = line.strip() + + if line == end_signal: + good_to_go = True + + + for metric in metrics: + match = metric["regex"].search(line) + if match and good_to_go: + if "file" not in output: + output["file"] = fpath + num = float(match.group(1)) + name = metric["name"] + output[name] = num + + if output: + outputs.append(output) + + + + if len(outputs) <= 0: + print("Nothing found in :") + print(directory) + return output_results + + metrics_results = defaultdict(list) + + for output in outputs: + msg = "" + for key, value in output.items(): + if isinstance(value, float): + msg += f"{key}: {value:.2f}%. " + else: + msg += f"{key}: {value}. " + if key != "file": + metrics_results[key].append(value) + print(msg) + + + + print("===") + print(f"Summary of directory: {directory}") + for key, values in metrics_results.items(): + avg = np.mean(values) + std = compute_ci95(values) if args.ci95 else np.std(values) + print(f"* {key}: {avg:.2f}% +- {std:.2f}%") + output_results[key] = avg + print("===") + + return output_results + + +def main(args, end_signal): + metric = { + "name": args.keyword, + "regex": re.compile(fr"\* {args.keyword}: ([\.\deE+-]+)%"), + } + + if args.type == "base2new": + all_dataset = b2n_dataset + final_results = defaultdict(list) + final_results1 = defaultdict(list) + pattern = r'\b(' + '|'.join(map(re.escape, all_dataset)) + r')\b' + # 替换匹配到的单词为 '{}' + p=args.directory + path_str = re.sub(pattern, "{}", p) + all_dic = [path_str.format(dataset)for dataset in all_dataset] + + all_dic1 = [] + if "train_base" in all_dic[0]: + for p in all_dic: + + all_dic1.append(p.replace("train_base", "test_new")) + + elif "test_new" in all_dic[0]: + for p in all_dic: + + + all_dic1.append(p.replace("test_new", "train_base")) + + temp = all_dic + all_dic = all_dic1 + all_dic1= temp + + for i, directory in enumerate(all_dic): + results = parse_function( + metric, directory=directory, args=args, end_signal=end_signal + ) + for key, value in results.items(): + final_results[key].append(value) + + for i, directory in enumerate(all_dic1): + results1 = parse_function( + metric, directory=directory, args=args, end_signal=end_signal + ) + for key, value in results1.items(): + final_results1[key].append(value) + + + output_data = [] + for i in range(len(all_dataset)): + base = final_results['accuracy'][i] + new = final_results1['accuracy'][i] + try: + h = 2 / (1/base + 1/new) + except: + h = 0 + result = { + 'Dataset': all_dataset[i], + 'Base Accuracy': base, + 'New Accuracy': new, + 'H-Mean': h + } + output_data.append(result) + print(f"{all_dataset[i]:<20}: base: {base:>6.2f} new: {new:>6.2f} h: {h:>6.2f}") + + output_df = pd.DataFrame(output_data) + + # 将结果保存到 Excel + output_file = "form_results_base2new.xlsx" + output_df.to_excel(output_file, index=False) + + + print("Average performance:") + + for key, values in final_results.items(): + avg_base = np.mean(values) + print('base') + print(f"* {key}: {avg_base:.2f}%") + + for key, values in final_results1.items(): + avg_new = np.mean(values) + print('new') + print(f"* {key}: {avg_new:.2f}%") + + try: + avg_h = 2 / (1/avg_base + 1/avg_new) + except: + avg_h = 0 + print(f'h: {avg_h:.2f}%') + else: + if args.type == "fewshot": + all_dataset = b2n_dataset + elif args.type == "cross": + all_dataset = cross_dataset + elif args.type == "dg": + all_dataset = dg_dataset + + final_results = defaultdict(list) + pattern = r'\b(' + '|'.join(map(re.escape, all_dataset)) + r')\b' + p=args.directory + path_str = re.sub(pattern, "{}", p) + all_dic = [path_str.format(dataset)for dataset in all_dataset] + + + for i, directory in enumerate(all_dic): + results = parse_function( + metric, directory=directory, args=args, end_signal=end_signal + ) + for key, value in results.items(): + final_results[key].append(value) + + output_data = [] + for i in range(len(all_dataset)): + base = final_results['accuracy'][i] + + result = { + 'Dataset': all_dataset[i], + 'Accuracy': base, + } + output_data.append(result) + print(f"{all_dataset[i]:<20}: Accuracy: {base:>6.2f}") + + output_df = pd.DataFrame(output_data) + + # 将结果保存到 Excel + output_file = "form_results_"+args.type+".xlsx" + output_df.to_excel(output_file, index=False) + + + print("Average performance:") + + for key, values in final_results.items(): + avg_base = np.mean(values) + print(f"* {key}: {avg_base:.2f}%") + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("directory", type=str, help="path to directory") + parser.add_argument("-type", type=str, + choices=['base2new', 'fewshot', 'cross', 'dg'], # 添加参数校验 + help="task type:base2new, fewshot, cross, dg") + parser.add_argument( + "--ci95", action="store_true", help=r"compute 95\% confidence interval" + ) + parser.add_argument("--test-log", action="store_true", help="parse test-only logs") + parser.add_argument( + "--multi-exp", action="store_true", help="parse multiple experiments" + ) + parser.add_argument( + "--keyword", default="accuracy", type=str, help="which keyword to extract" + ) + args = parser.parse_args() + + end_signal = "=> result" + if args.test_log: + end_signal = "=> result" + + main(args, end_signal) + diff --git a/reproduce.sh b/reproduce.sh new file mode 100644 index 0000000000000000000000000000000000000000..fdcf277ca710739d632f56849cd09280994152be --- /dev/null +++ b/reproduce.sh @@ -0,0 +1,71 @@ +#base2new +bash scripts/supr/reproduce_base2novel_setting.sh eurosat +bash scripts/supr/reproduce_base2novel_setting.sh oxford_pets +bash scripts/supr/reproduce_base2novel_setting.sh dtd +bash scripts/supr/reproduce_base2novel_setting.sh fgvc_aircraft +bash scripts/supr/reproduce_base2novel_setting.sh oxford_flowers +bash scripts/supr/reproduce_base2novel_setting.sh ucf101 +bash scripts/supr/reproduce_base2novel_setting.sh caltech101 +bash scripts/supr/reproduce_base2novel_setting.sh food101 +bash scripts/supr/reproduce_base2novel_setting.sh stanford_cars +bash scripts/supr/reproduce_base2novel_setting.sh sun397 +bash scripts/supr/reproduce_base2novel_setting.sh imagenet + +# cross_dataset +bash scripts/supr/reproduce_xd.sh eurosat +bash scripts/supr/reproduce_xd.sh oxford_pets +bash scripts/supr/reproduce_xd.sh dtd +bash scripts/supr/reproduce_xd.sh fgvc_aircraft +bash scripts/supr/reproduce_xd.sh oxford_flowers +bash scripts/supr/reproduce_xd.sh ucf101 +bash scripts/supr/reproduce_xd.sh caltech101 +bash scripts/supr/reproduce_xd.sh food101 +bash scripts/supr/reproduce_xd.sh stanford_cars +bash scripts/supr/reproduce_xd.sh sun397 +bash scripts/supr/reproduce_xd.sh imagenet + +# domain_generalization +bash scripts/supr/reproduce_xd.sh imagenetv2 +bash scripts/supr/reproduce_xd.sh imagenet_sketch +bash scripts/supr/reproduce_xd.sh imagenet_a +bash scripts/supr/reproduce_xd.sh imagenet_r + +# fewshot +bash scripts/supr/reproduce_fewshot.sh eurosat +bash scripts/supr/reproduce_fewshot.sh oxford_pets +bash scripts/supr/reproduce_fewshot.sh dtd +bash scripts/supr/reproduce_fewshot.sh fgvc_aircraft +bash scripts/supr/reproduce_fewshot.sh oxford_flowers +bash scripts/supr/reproduce_fewshot.sh ucf101 +bash scripts/supr/reproduce_fewshot.sh caltech101 +bash scripts/supr/reproduce_fewshot.sh food101 +bash scripts/supr/reproduce_fewshot.sh stanford_cars +bash scripts/supr/reproduce_fewshot.sh sun397 +bash scripts/supr/reproduce_fewshot.sh imagenet + + +#SuPrEns base2new +bash scripts/supr_ens/reproduce_base2novel_setting.sh eurosat +bash scripts/supr_ens/reproduce_base2novel_setting.sh oxford_pets +bash scripts/supr_ens/reproduce_base2novel_setting.sh dtd +bash scripts/supr_ens/reproduce_base2novel_setting.sh fgvc_aircraft +bash scripts/supr_ens/reproduce_base2novel_setting.sh oxford_flowers +bash scripts/supr_ens/reproduce_base2novel_setting.sh ucf101 +bash scripts/supr_ens/reproduce_base2novel_setting.sh caltech101 +bash scripts/supr_ens/reproduce_base2novel_setting.sh food101 +bash scripts/supr_ens/reproduce_base2novel_setting.sh stanford_cars +bash scripts/supr_ens/reproduce_base2novel_setting.sh sun397 +bash scripts/supr_ens/reproduce_base2novel_setting.sh imagenet + +#SuPr+PromptSRC base2new +bash scripts/supr_src/reproduce_base2novel_setting.sh eurosat +bash scripts/supr_src/reproduce_base2novel_setting.sh oxford_pets +bash scripts/supr_src/reproduce_base2novel_setting.sh dtd +bash scripts/supr_src/reproduce_base2novel_setting.sh fgvc_aircraft +bash scripts/supr_src/reproduce_base2novel_setting.sh oxford_flowers +bash scripts/supr_src/reproduce_base2novel_setting.sh ucf101 +bash scripts/supr_src/reproduce_base2novel_setting.sh caltech101 +bash scripts/supr_src/reproduce_base2novel_setting.sh food101 +bash scripts/supr_src/reproduce_base2novel_setting.sh stanford_cars +bash scripts/supr_src/reproduce_base2novel_setting.sh sun397 +bash scripts/supr_src/reproduce_base2novel_setting.sh imagenet \ No newline at end of file diff --git a/scripts/cocoop/base2new_test.sh b/scripts/cocoop/base2new_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..4e545ac9913e69e02136c893c592d933ab3c7b34 --- /dev/null +++ b/scripts/cocoop/base2new_test.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA="/path/to/dataset/folder" +TRAINER=CoCoOp + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c4_ep10_batch1_ctxv1 +SHOTS=16 +LOADEP=10 +SUB=new + + +COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} +MODEL_DIR=output/base2new/train_base/${COMMON_DIR} +DIR=output/base2new/test_${SUB}/${COMMON_DIR} +if [ -d "$DIR" ]; then + echo "Evaluating model" + echo "Results are available in ${DIR}. Resuming..." + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} + +else + echo "Evaluating model" + echo "Runing the first phase job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} +fi \ No newline at end of file diff --git a/scripts/cocoop/base2new_train.sh b/scripts/cocoop/base2new_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..6acca5d42ed7c6e5b1939c6209ccf8b4d7c23349 --- /dev/null +++ b/scripts/cocoop/base2new_train.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +#cd ../.. + +# custom config +# DATA="/path/to/dataset/folder" +DATA='/ossfs/workspace/nas1/209290/zhengli_dataset' + +TRAINER=CoCoOp + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c4_ep10_batch1_ctxv1 +SHOTS=16 + + +DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Resuming..." + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base +else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base +fi \ No newline at end of file diff --git a/scripts/cocoop/xd_test.sh b/scripts/cocoop/xd_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..47df031cc2d7c27fc91fe6318c51f9ada7a6837c --- /dev/null +++ b/scripts/cocoop/xd_test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA="/path/to/dataset/folder" + +TRAINER=CoCoOp + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c4_ep10_batch1_ctxv1 +SHOTS=16 + + +DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" +else + echo "Run this job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} \ + --load-epoch 10 \ + --eval-only +fi \ No newline at end of file diff --git a/scripts/cocoop/xd_train.sh b/scripts/cocoop/xd_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..40ecc63cbf6cc41b624b1ac0caae870548bbc00f --- /dev/null +++ b/scripts/cocoop/xd_train.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +#cd ../.. + +# custom config +# DATA="/path/to/dataset/folder" +DATA='/ossfs/workspace/nas1/209290/zhengli_dataset' + +TRAINER=CoCoOp + +DATASET=imagenet +SEED=$1 + +CFG=vit_b16_c4_ep10_batch1_ctxv1 +SHOTS=16 + + +DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" +else + echo "Run this job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} +fi \ No newline at end of file diff --git a/scripts/coop/base2new.sh b/scripts/coop/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..441ab695d1a9639ff841ab3c1fe25e50ff9abfbb --- /dev/null +++ b/scripts/coop/base2new.sh @@ -0,0 +1,55 @@ + + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=CoOp + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +CFG=vit_b16_c4_ep10_batch1_ctxv1 +SHOTS=16 +EP=10 +CTX_INIT="" + +for SEED in 1 2 3 +do + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/CoCoOp/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + + + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/CoCoOp/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES new + + fi +done diff --git a/scripts/coop/eval.sh b/scripts/coop/eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..d1ad12e914c48372766de5ac1ec6f50732d5dafe --- /dev/null +++ b/scripts/coop/eval.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA=/path/to/datasets +TRAINER=CoOp +SHOTS=16 +NCTX=16 +CSC=False +CTP=end + +DATASET=$1 +CFG=$2 + +for SEED in 1 2 3 +do + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/nctx${NCTX}_csc${CSC}_ctp${CTP}/${DATASET}/seed${SEED} \ + --model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/nctx${NCTX}_csc${CSC}_ctp${CTP}/seed${SEED} \ + --load-epoch 50 \ + --eval-only \ + TRAINER.COOP.N_CTX ${NCTX} \ + TRAINER.COOP.CSC ${CSC} \ + TRAINER.COOP.CLASS_TOKEN_POSITION ${CTP} +done \ No newline at end of file diff --git a/scripts/coop/main.sh b/scripts/coop/main.sh new file mode 100644 index 0000000000000000000000000000000000000000..a62a02602e1507d9061c8db077ef48647e1ac3ec --- /dev/null +++ b/scripts/coop/main.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA=/path/to/datasets +TRAINER=CoOp + +DATASET=$1 +CFG=$2 # config file +CTP=$3 # class token position (end or middle) +NCTX=$4 # number of context tokens +SHOTS=$5 # number of shots (1, 2, 4, 8, 16) +CSC=$6 # class-specific context (False or True) + +for SEED in 1 2 3 +do + DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/nctx${NCTX}_csc${CSC}_ctp${CTP}/seed${SEED} + if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + TRAINER.COOP.N_CTX ${NCTX} \ + TRAINER.COOP.CSC ${CSC} \ + TRAINER.COOP.CLASS_TOKEN_POSITION ${CTP} \ + DATASET.NUM_SHOTS ${SHOTS} + fi +done \ No newline at end of file diff --git a/scripts/dept/base2new.sh b/scripts/dept/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..917e435e1359518392d1bca306f0783039917bab --- /dev/null +++ b/scripts/dept/base2new.sh @@ -0,0 +1,57 @@ + + + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=ExtrasLinearProbePromptSRC + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +# SUBDIM=9 +CFG=vit_b16_c2_ep20_batch4_4+4ctx +SHOTS=16 +EP=20 + +for SEED in 1 2 3 +do + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/PromptSRC/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + + + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/PromptSRC/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES new + + + fi +done diff --git a/scripts/independent-vlp/base2new.sh b/scripts/independent-vlp/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..24c05e673caff8df673760ca865edb3b562ee332 --- /dev/null +++ b/scripts/independent-vlp/base2new.sh @@ -0,0 +1,55 @@ + + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=IVLP + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +# SUBDIM=9 +CFG=vit_b16_c2_ep20_batch8_4+4ctx +SHOTS=16 +EP=20 +for SEED in 1 2 3 +do + + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES new + + fi +done + \ No newline at end of file diff --git a/scripts/kgcoop/base2new.sh b/scripts/kgcoop/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..275df568bf21239f5e9e319a28252ee321235359 --- /dev/null +++ b/scripts/kgcoop/base2new.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=KgCoOp + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +# SUBDIM=9 +CFG=vit_b16_ep100_ctxv1 +SHOTS=16 +NCTX=4 # number of context tokens +EP=100 +CTX_INIT="" + +for SEED in 1 2 3 +do + + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/KgCoOp/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + TRAINER.COOP.N_CTX ${NCTX} \ + DATASET.SUBSAMPLE_CLASSES base + + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/KgCoOp/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + TRAINER.COOP.N_CTX ${NCTX} \ + DATASET.SUBSAMPLE_CLASSES new + + + fi +done diff --git a/scripts/maple/base2new_test_maple.sh b/scripts/maple/base2new_test_maple.sh new file mode 100644 index 0000000000000000000000000000000000000000..0446cd65fd25d84915e647821ace5a68e1fb658f --- /dev/null +++ b/scripts/maple/base2new_test_maple.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +#cd ../.. + +# custom config +# DATA="/path/to/dataset/folder" +DATA='/ossfs/workspace/nas1/209290/zhengli_dataset' + +TRAINER=MaPLe + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep5_batch4_2ctx +SHOTS=16 +LOADEP=5 +SUB=new + + +COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} +MODEL_DIR=output/base2new/train_base/${COMMON_DIR} +DIR=output/base2new/test_${SUB}/${COMMON_DIR} +if [ -d "$DIR" ]; then + echo "Evaluating model" + echo "Results are available in ${DIR}. Resuming..." + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} + +else + echo "Evaluating model" + echo "Runing the first phase job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} +fi \ No newline at end of file diff --git a/scripts/maple/base2new_train_maple.sh b/scripts/maple/base2new_train_maple.sh new file mode 100644 index 0000000000000000000000000000000000000000..72eb9d5727b5794804254321c46e706ede2e2511 --- /dev/null +++ b/scripts/maple/base2new_train_maple.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +#cd ../.. + +# custom config +# DATA="/path/to/dataset/folder" +DATA='/ossfs/workspace/nas1/209290/zhengli_dataset' + +TRAINER=MaPLe + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep5_batch4_2ctx +SHOTS=16 + + +DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Resuming..." + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base +else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base +fi \ No newline at end of file diff --git a/scripts/maple/xd_test_maple.sh b/scripts/maple/xd_test_maple.sh new file mode 100644 index 0000000000000000000000000000000000000000..11828ec035305048f48be061c34b42d5dd1c3ef1 --- /dev/null +++ b/scripts/maple/xd_test_maple.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA="/path/to/dataset/folder" +TRAINER=MaPLe + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep5_batch4_2ctx_cross_datasets +SHOTS=16 + + +DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" +else + echo "Run this job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} \ + --load-epoch 2 \ + --eval-only +fi \ No newline at end of file diff --git a/scripts/maple/xd_train_maple.sh b/scripts/maple/xd_train_maple.sh new file mode 100644 index 0000000000000000000000000000000000000000..2829c513f222665e09ae367c5e63cc4215df9633 --- /dev/null +++ b/scripts/maple/xd_train_maple.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA="/path/to/dataset/folder" +TRAINER=MaPLe + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep5_batch4_2ctx_cross_datasets +SHOTS=16 + + +DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}." +else + echo "Run this job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} +fi \ No newline at end of file diff --git a/scripts/promptsrc/base2new.sh b/scripts/promptsrc/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..e7991885723ec734173df98742cdcfb6b8fa80ac --- /dev/null +++ b/scripts/promptsrc/base2new.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=PromptSRC + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +# SUBDIM=9 +CFG=vit_b16_c2_ep20_batch4_4+4ctx +SHOTS=16 +EP=20 + +for SEED in 1 2 3 +do + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/PromptSRC/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + + + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/PromptSRC/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES new + + + fi +done diff --git a/scripts/promptsrc/base2new_test.sh b/scripts/promptsrc/base2new_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..0a546579814ed5f3f91e22ff1378b5c1a5871ce8 --- /dev/null +++ b/scripts/promptsrc/base2new_test.sh @@ -0,0 +1,53 @@ +#!/bin/bash + + +# custom config +DATA="/path/to/dataset/folder" +TRAINER=PromptSRC + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep20_batch4_4+4ctx +SHOTS=16 +LOADEP=20 +SUB=new + + +COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} +MODEL_DIR=output/base2new/train_base/${COMMON_DIR} +DIR=output/base2new/test_${SUB}/${COMMON_DIR} +if [ -d "$DIR" ]; then + echo "Evaluating model" + echo "Results are available in ${DIR}. Resuming..." + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} + +else + echo "Evaluating model" + echo "Runing the first phase job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} +fi \ No newline at end of file diff --git a/scripts/promptsrc/base2new_train.sh b/scripts/promptsrc/base2new_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..47b6b16e2796482130a461b5f07799b415a8a0ea --- /dev/null +++ b/scripts/promptsrc/base2new_train.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# custom config +DATA="/path/to/dataset/folder" + +TRAINER=PromptSRC + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep20_batch4_4+4ctx +SHOTS=16 + + +DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED} + +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Resuming..." + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base +else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base +fi \ No newline at end of file diff --git a/scripts/promptsrc/few_shot.sh b/scripts/promptsrc/few_shot.sh new file mode 100644 index 0000000000000000000000000000000000000000..46afb094609668fc76f20966f274477bc3f98423 --- /dev/null +++ b/scripts/promptsrc/few_shot.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# custom config +DATA="/path/to/dataset/folder" +TRAINER=PromptSRC + +DATASET=$1 +CFG=vit_b16_c2_ep50_batch4_4+4ctx_few_shot +SHOTS=$2 + +for SEED in 1 2 3 +do + DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} + if [ -d "$DIR" ]; then + echo " The results exist at ${DIR}" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} + fi +done \ No newline at end of file diff --git a/scripts/promptsrc/xd_test.sh b/scripts/promptsrc/xd_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..4be1f7038db6ba67b1e468d5e1433b1a660e86de --- /dev/null +++ b/scripts/promptsrc/xd_test.sh @@ -0,0 +1,31 @@ +#!/bin/bash + + +# custom config +DATA="/path/to/dataset/folder" +TRAINER=PromptSRC + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets +SHOTS=16 + + +DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" +else + echo "Run this job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} \ + --load-epoch 20 \ + --eval-only +fi \ No newline at end of file diff --git a/scripts/promptsrc/xd_train.sh b/scripts/promptsrc/xd_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..0da25e3675337643add4b65941025262af94b578 --- /dev/null +++ b/scripts/promptsrc/xd_train.sh @@ -0,0 +1,29 @@ +#!/bin/bash + + +# custom config +DATA="/path/to/dataset/folder" +TRAINER=PromptSRC + +DATASET=$1 +SEED=$2 + +CFG=vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets +SHOTS=16 + + +DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} +if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}." +else + echo "Run this job and save the output to ${DIR}" + + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} +fi \ No newline at end of file diff --git a/scripts/supr/base2new.sh b/scripts/supr/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..c27495fa076f49d8b6345a15eb2bb212c32ee3f1 --- /dev/null +++ b/scripts/supr/base2new.sh @@ -0,0 +1,50 @@ + + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPr + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +SHOTS=16 +EP=10 +CFG=vit_b16_ep10_batch4_4+4ctx + +for SEED in 1 2 3 +do + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.SUBSAMPLE_CLASSES new + fi +done diff --git a/scripts/supr/cross_dg.sh b/scripts/supr/cross_dg.sh new file mode 100644 index 0000000000000000000000000000000000000000..9c902c346e432837ddae8805d470540a4f534339 --- /dev/null +++ b/scripts/supr/cross_dg.sh @@ -0,0 +1,74 @@ + + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPr + + +SHOTS=16 +CFG=vit_b16_ep12_batch8_4+4ctx_cross_datasets + +for SEED in 1 2 3 +do + TRAIL_NAME=${CFG} + MODEL_DIR=output/cross_dg/${TRAINER}/${TRAIL_NAME}/imagenet/shots_${SHOTS}/seed${SEED} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/imagenet.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} + fi + + # cross + for DATASET in caltech101 dtd eurosat fgvc_aircraft oxford_flowers food101 oxford_pets stanford_cars sun397 ucf101 + do + DIR=output/cross_dg/${TRAINER}/${TRAIL_NAME}/${DATASET}/shots_${SHOTS}/seed${SEED} + + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Cross-dataset Evaluating" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch 4 \ + --eval-only + fi + done + + # dg + for DATASET in imagenetv2 imagenet_sketch imagenet_a imagenet_r + do + DIR=output/cross_dg/${TRAINER}/${TRAIL_NAME}/${DATASET}/shots_${SHOTS}/seed${SEED} + + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Domain Generlization Evaluating" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch 4 \ + --eval-only + fi + done + +done diff --git a/scripts/supr/few_shot.sh b/scripts/supr/few_shot.sh new file mode 100644 index 0000000000000000000000000000000000000000..4ce392e1d6d018a18cf98b664ac68055f8730f6c --- /dev/null +++ b/scripts/supr/few_shot.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPr + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + + +SHOTS=4 +CFG=vit_b16_ep25_batch8_4+4ctx_few_shot +for SEED in 1 2 3 +do + # COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/1119_s${SUBDIM}_${CFG}/seed${SEED} + TRAIL_NAME=${CFG} + DIR=output/fewshot/${TRAINER}/${TRAIL_NAME}/${DATASET}/shots_${SHOTS}/seed${SEED} + + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + DATASET.NUM_SHOTS ${SHOTS} + fi +done \ No newline at end of file diff --git a/scripts/supr/reproduce_base2novel_setting.sh b/scripts/supr/reproduce_base2novel_setting.sh new file mode 100644 index 0000000000000000000000000000000000000000..be43d3569bccad223949543a6b277e54040319a5 --- /dev/null +++ b/scripts/supr/reproduce_base2novel_setting.sh @@ -0,0 +1,53 @@ +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPr +WEIGHTSPATH=weights + + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' +EP=10 +CFG=vit_b16_ep10_batch4_4+4ctx +SHOTS=16 + +for SEED in 1 2 3 +do + TRAIL_NAME=reproduce_${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=${WEIGHTSPATH}/${TRAINER}/base2new/${COMMON_DIR} + BASE_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + NEW_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$BASE_DIR" ]; then + echo " The results exist at ${BASE_DIR}" + else + echo "Run this job and save the output to ${BASE_DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${BASE_DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.SUBSAMPLE_CLASSES base + fi + + if [ -d "$NEW_DIR" ]; then + echo " The results exist at ${NEW_DIR}" + else + echo "Run this job and save the output to ${NEW_DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${NEW_DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.SUBSAMPLE_CLASSES new + fi +done diff --git a/scripts/supr/reproduce_fewshot.sh b/scripts/supr/reproduce_fewshot.sh new file mode 100644 index 0000000000000000000000000000000000000000..115bfa52a25709d2ad792adca2fde125e8d10626 --- /dev/null +++ b/scripts/supr/reproduce_fewshot.sh @@ -0,0 +1,30 @@ +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPr +WEIGHTSPATH=weights + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' +CFG=vit_b16_ep25_batch8_4+4ctx_few_shot +for SEED in 1 2 3 +do + TRAIL_NAME=reproduce_${CFG} + COMMON_DIR=${DATASET}/shots_4/seed${SEED} + MODEL_DIR=${WEIGHTSPATH}/${TRAINER}/fewshot/${COMMON_DIR} + DIR=output/fewshot/${TRAINER}/${TRAIL_NAME}/${COMMON_DIR} + + if [ -d "$DIR" ]; then + echo " The results exist at ${DIR}" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --eval-only + fi +done + diff --git a/scripts/supr/reproduce_xd.sh b/scripts/supr/reproduce_xd.sh new file mode 100644 index 0000000000000000000000000000000000000000..b8f2ddcef3278035ccfc443ceaedfacf0b8cbb02 --- /dev/null +++ b/scripts/supr/reproduce_xd.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPr +WEIGHTSPATH=weights + +DATASET=$1 +CFG=vit_b16_ep12_batch8_4+4ctx_cross_datasets + + +for SEED in 1 2 3 +do + TRAIL_NAME=reproduce_${CFG} + MODEL_DIR=${WEIGHTSPATH}/${TRAINER}/cross_dg/imagenet/shots_16/seed${SEED} + DIR=output/cross_dg/${TRAINER}/${TRAIL_NAME}/${DATASET}/shots_16/seed${SEED} + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch 4 \ + --eval-only + fi +done + + diff --git a/scripts/supr_ens/base2new.sh b/scripts/supr_ens/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..3a9dd69b9812509c3a0e4af298cb99a6aca4c40a --- /dev/null +++ b/scripts/supr_ens/base2new.sh @@ -0,0 +1,54 @@ + + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPrEns + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +SHOTS=16 +EP=10 +CFG=vit_b16_ep10_batch4_4+4ctx + +for SEED in 1 2 3 +do + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + TRAINER.SUPR.SVD False \ + DATASET.SUBSAMPLE_CLASSES base + # use least square save gpu memory(mathematical equivalent) + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + TRAINER.SUPR.SVD False \ + DATASET.SUBSAMPLE_CLASSES new + # use least square save gpu memory(mathematical equivalent) + fi +done diff --git a/scripts/supr_ens/reproduce_base2novel_setting.sh b/scripts/supr_ens/reproduce_base2novel_setting.sh new file mode 100644 index 0000000000000000000000000000000000000000..2757227ef6e088ea1fb4c0a1b8ae9ce44ea89e91 --- /dev/null +++ b/scripts/supr_ens/reproduce_base2novel_setting.sh @@ -0,0 +1,55 @@ +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SuPrEns +WEIGHTSPATH=weights + + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' +EP=10 +CFG=vit_b16_ep10_batch4_4+4ctx +SHOTS=16 + +for SEED in 1 2 3 +do + TRAIL_NAME=reproduce_${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=${WEIGHTSPATH}/${TRAINER}/${COMMON_DIR} + BASE_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + NEW_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + + if [ -d "$BASE_DIR" ]; then + echo " The results exist at ${BASE_DIR}" + else + echo "Run this job and save the output to ${BASE_DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${BASE_DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + TRAINER.SUPR.SVD False \ + DATASET.SUBSAMPLE_CLASSES base + fi + if [ -d "$NEW_DIR" ]; then + echo " The results exist at ${NEW_DIR}" + else + echo "Run this job and save the output to ${NEW_DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${NEW_DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + TRAINER.SUPR.SVD False \ + DATASET.SUBSAMPLE_CLASSES new + fi +done diff --git a/scripts/supr_src/base2new.sh b/scripts/supr_src/base2new.sh new file mode 100644 index 0000000000000000000000000000000000000000..de45daa51f094506bd387519d6d405ae19b4e2cb --- /dev/null +++ b/scripts/supr_src/base2new.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SubspacePromptSRC + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + + +CFG=vit_b16_ep20_batch4_4+4ctx_promptsrc +SHOTS=16 +EP=20 + + +for SEED in 1 2 3 +do + TRAIL_NAME=${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$MODEL_DIR" ]; then + echo "Oops! The results exist at ${MODEL_DIR} (so skip this job)" + else + echo "Run this job and save the output to ${MODEL_DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${MODEL_DIR} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + fi + if [ -d "$DIR" ]; then + echo "Oops! The results exist at ${DIR} (so skip this job)" + else + echo "Evaluating model" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.SUBSAMPLE_CLASSES new + + fi +done + \ No newline at end of file diff --git a/scripts/supr_src/reproduce_base2novel_setting.sh b/scripts/supr_src/reproduce_base2novel_setting.sh new file mode 100644 index 0000000000000000000000000000000000000000..a2ed67f8fe75c3077dd517bd5d9da8d9715b1c1f --- /dev/null +++ b/scripts/supr_src/reproduce_base2novel_setting.sh @@ -0,0 +1,54 @@ +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=SubspacePromptSRC +WEIGHTSPATH=weights + + +DATASET=$1 # 'imagenet' 'caltech101' 'dtd' 'eurosat' 'fgvc_aircraft' 'oxford_flowers' 'food101' 'oxford_pets' 'stanford_cars' 'sun397' 'ucf101' + +EP=20 +CFG=vit_b16_ep20_batch4_4+4ctx_promptsrc +SHOTS=16 + +for SEED in 1 2 3 +do + TRAIL_NAME=reproduce_${CFG} + COMMON_DIR=${DATASET}/shots_${SHOTS}/seed${SEED} + MODEL_DIR=${WEIGHTSPATH}/${TRAINER}/${COMMON_DIR} + BASE_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/train_base/${COMMON_DIR} + NEW_DIR=output/base2new/${TRAINER}/${TRAIL_NAME}/test_new/${COMMON_DIR} + + if [ -d "$NEW_DIR" ]; then + echo " The results exist at ${NEW_DIR}" + else + echo "Run this job and save the output to ${NEW_DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${BASE_DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.SUBSAMPLE_CLASSES base + fi + + if [ -d "$NEW_DIR" ]; then + echo " The results exist at ${NEW_DIR}" + else + echo "Run this job and save the output to ${NEW_DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/SuPr/${CFG}.yaml \ + --output-dir ${NEW_DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${EP} \ + --eval-only \ + DATASET.SUBSAMPLE_CLASSES new + fi +done diff --git a/scripts/tcp/base2new_train.sh b/scripts/tcp/base2new_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..66439373d5ab1f46be5442473ee77c66a0949dac --- /dev/null +++ b/scripts/tcp/base2new_train.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +# cd .. + +# custom config +DATA=/mnt/sdb/data/datasets +TRAINER=TCP +WEIGHT=1.0 + +CFG=vit_b16_ep100_ctxv1 +CTP=end # class token position (end or middle) +NCTX=4 # number of context tokens +SHOTS=16 # number of shots (1, 2, 4, 8, 16) +CSC=False # class-specific context (False or True) +FOLDER=output_1108 + + +for DATASET in caltech101 dtd eurosat fgvc_aircraft food101 oxford_flowers oxford_pets stanford_cars ucf101 +do + for SEED in 1 2 3 + do + DIR=${FOLDER}_${NCTX}/base2new/train_base/${DATASET}/shots_${SHOTS}_${WEIGHT}/${TRAINER}/${CFG}/seed${SEED} + if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" + else + echo "Run this job and save the output to ${DIR}" + CUDA_VISIBLE_DEVICES=0 python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + TRAINER.TCP.N_CTX ${NCTX} \ + TRAINER.TCP.CSC ${CSC} \ + TRAINER.TCP.W ${WEIGHT} \ + TRAINER.TCP.CLASS_TOKEN_POSITION ${CTP} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + fi + done + + + LOADEP=50 + SUB=new + for SEED in 1 2 3 + do + COMMON_DIR=${DATASET}/shots_${SHOTS}_${WEIGHT}/${TRAINER}/${CFG}/seed${SEED} + MODEL_DIR=${FOLDER}_${NCTX}/base2new/train_base/${COMMON_DIR} + DIR=${FOLDER}_${NCTX}_eval/base2new/test_${SUB}/${COMMON_DIR} + + if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" + else + echo "Run this job and save the output to ${DIR}" + CUDA_VISIBLE_DEVICES=0 python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + TRAINER.TCP.N_CTX ${NCTX} \ + TRAINER.TCP.CSC ${CSC} \ + TRAINER.TCP.CLASS_TOKEN_POSITION ${CTP} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} + fi + done +done + + +for DATASET in sun397 imagenet +do +for SEED in 1 2 3 +do + DIR=${FOLDER}_${NCTX}/base2new/train_base/${DATASET}/shots_${SHOTS}_${WEIGHT}/${TRAINER}/${CFG}/seed${SEED} + if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" + else + echo "Run this job and save the output to ${DIR}" + CUDA_VISIBLE_DEVICES=0 python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + TRAINER.TCP.N_CTX ${NCTX} \ + TRAINER.TCP.CSC ${CSC} \ + TRAINER.TCP.W ${WEIGHT} \ + TRAINER.TCP.CLASS_TOKEN_POSITION ${CTP} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base + fi +done + + +LOADEP=25 +SUB=new +for SEED in 1 2 3 +do + COMMON_DIR=${DATASET}/shots_${SHOTS}_${WEIGHT}/${TRAINER}/${CFG}/seed${SEED} + MODEL_DIR=${FOLDER}_${NCTX}/base2new/train_base/${COMMON_DIR} + DIR=${FOLDER}_${NCTX}/base2new/test_${SUB}/${COMMON_DIR} + + if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" + else + echo "Run this job and save the output to ${DIR}" + CUDA_VISIBLE_DEVICES=0 python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + TRAINER.TCP.N_CTX ${NCTX} \ + TRAINER.TCP.CSC ${CSC} \ + TRAINER.TCP.CLASS_TOKEN_POSITION ${CTP} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} + fi +done +done + diff --git a/scripts/zsclip/zeroshot.sh b/scripts/zsclip/zeroshot.sh new file mode 100644 index 0000000000000000000000000000000000000000..6881739ba0aae56131da0019a1d28ea64382e268 --- /dev/null +++ b/scripts/zsclip/zeroshot.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +#cd ../.. + +# custom config +DATA=/path/to/datasets +TRAINER=ZeroshotCLIP +DATASET=$1 +CFG=$2 # rn50, rn101, vit_b32 or vit_b16 + +python train.py \ +--root ${DATA} \ +--trainer ${TRAINER} \ +--dataset-config-file configs/datasets/${DATASET}.yaml \ +--config-file configs/trainers/CoOp/${CFG}.yaml \ +--output-dir output/${TRAINER}/${CFG}/${DATASET} \ +--eval-only \ No newline at end of file diff --git a/train.py b/train.py new file mode 100644 index 0000000000000000000000000000000000000000..1572fcc2171a3a8772a7658b42d04cf1f402a698 --- /dev/null +++ b/train.py @@ -0,0 +1,290 @@ +import argparse +import torch + +from dassl.utils import setup_logger, set_random_seed, collect_env_info +from dassl.config import get_cfg_default +from dassl.engine import build_trainer + +# custom +import datasets.oxford_pets +import datasets.oxford_flowers +import datasets.fgvc_aircraft +import datasets.dtd +import datasets.eurosat +import datasets.stanford_cars +import datasets.food101 +import datasets.sun397 +import datasets.caltech101 +import datasets.ucf101 +import datasets.imagenet + +import datasets.imagenet_sketch +import datasets.imagenetv2 +import datasets.imagenet_a +import datasets.imagenet_r + +import trainers.coop +import trainers.cocoop +import trainers.kgcoop +import trainers.zsclip +import trainers.maple +import trainers.independentVL +import trainers.promptsrc +import trainers.tcp +import trainers.supr +import trainers.supr_ens +import trainers.elp_promptsrc +import trainers.supr_promptsrc + + +def print_args(args, cfg): + print("***************") + print("** Arguments **") + print("***************") + optkeys = list(args.__dict__.keys()) + optkeys.sort() + for key in optkeys: + print("{}: {}".format(key, args.__dict__[key])) + print("************") + print("** Config **") + print("************") + print(cfg) + + +def reset_cfg(cfg, args): + if args.root: + cfg.DATASET.ROOT = args.root + + if args.output_dir: + cfg.OUTPUT_DIR = args.output_dir + + if args.resume: + cfg.RESUME = args.resume + + if args.seed: + cfg.SEED = args.seed + + if args.source_domains: + cfg.DATASET.SOURCE_DOMAINS = args.source_domains + + if args.target_domains: + cfg.DATASET.TARGET_DOMAINS = args.target_domains + + if args.transforms: + cfg.INPUT.TRANSFORMS = args.transforms + + if args.trainer: + cfg.TRAINER.NAME = args.trainer + + if args.backbone: + cfg.MODEL.BACKBONE.NAME = args.backbone + + if args.head: + cfg.MODEL.HEAD.NAME = args.head + + +def extend_cfg(cfg): + """ + Add new config variables. + + E.g. + from yacs.config import CfgNode as CN + cfg.TRAINER.MY_MODEL = CN() + cfg.TRAINER.MY_MODEL.PARAM_A = 1. + cfg.TRAINER.MY_MODEL.PARAM_B = 0.5 + cfg.TRAINER.MY_MODEL.PARAM_C = False + """ + from yacs.config import CfgNode as CN + + cfg.TRAINER.COOP = CN() + cfg.TRAINER.COOP.N_CTX = 16 # number of context vectors + cfg.TRAINER.COOP.CSC = False # class-specific context + cfg.TRAINER.COOP.CTX_INIT = "" # initialization words + cfg.TRAINER.COOP.PREC = "fp16" # fp16, fp32, amp + cfg.TRAINER.COOP.W = 8.0 # fp16, fp32, amp + cfg.TRAINER.COOP.CLASS_TOKEN_POSITION = "end" # 'middle' or 'end' or 'front' + + cfg.TRAINER.COCOOP = CN() + cfg.TRAINER.COCOOP.N_CTX = 16 # number of context vectors + cfg.TRAINER.COCOOP.CTX_INIT = "" # initialization words + cfg.TRAINER.COCOOP.PREC = "fp16" # fp16, fp32, amp + + # Config for MaPLe + cfg.TRAINER.MAPLE = CN() + cfg.TRAINER.MAPLE.N_CTX = 2 # number of context vectors + cfg.TRAINER.MAPLE.CTX_INIT = "a photo of a" # initialization words + cfg.TRAINER.MAPLE.PREC = "fp16" # fp16, fp32, amp + cfg.TRAINER.MAPLE.PROMPT_DEPTH = 9 # Max 12, minimum 0, for 1 it will act as shallow MaPLe (J=1) + cfg.DATASET.SUBSAMPLE_CLASSES = "all" # all, base or new + + # Config for PromptSRC + cfg.TRAINER.PROMPTSRC = CN() + cfg.TRAINER.PROMPTSRC.N_CTX_VISION = 4 # number of context vectors at the vision branch + cfg.TRAINER.PROMPTSRC.N_CTX_TEXT = 4 # number of context vectors at the language branch + cfg.TRAINER.PROMPTSRC.CTX_INIT = "a photo of a" # initialization words + cfg.TRAINER.PROMPTSRC.PREC = "fp16" # fp16, fp32, amp + cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_VISION = 9 # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1) + cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_TEXT = 9 # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1) + cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT = 25 + cfg.TRAINER.PROMPTSRC.IMAGE_LOSS_WEIGHT = 10 + cfg.TRAINER.PROMPTSRC.GPA_MEAN = 15 + cfg.TRAINER.PROMPTSRC.GPA_STD = 1 + + + # Config for independent Vision Language prompting (independent-vlp) + cfg.TRAINER.IVLP = CN() + cfg.TRAINER.IVLP.N_CTX_VISION = 2 # number of context vectors at the vision branch + cfg.TRAINER.IVLP.N_CTX_TEXT = 2 # number of context vectors at the language branch + cfg.TRAINER.IVLP.CTX_INIT = "a photo of a" # initialization words (only for language prompts) + cfg.TRAINER.IVLP.PREC = "fp16" # fp16, fp32, amp + # If both variables below are set to 0, 0, will the config will degenerate to COOP model + cfg.TRAINER.IVLP.PROMPT_DEPTH_VISION = 9 # Max 12, minimum 0, for 0 it will act as shallow IVLP prompting (J=1) + cfg.TRAINER.IVLP.PROMPT_DEPTH_TEXT = 9 # Max 12, minimum 0, for 0 it will act as shallow IVLP prompting(J=1) + cfg.DATASET.SUBSAMPLE_CLASSES = "all" # all, base or new + cfg.TEST.NO_TEST = False + + + #For DePT + # linear classifier settings + cfg.TRAINER.LINEAR_PROBE = CN() + cfg.TRAINER.LINEAR_PROBE.TYPE = 'linear' + cfg.TRAINER.LINEAR_PROBE.WEIGHT = 0.3 + cfg.TRAINER.LINEAR_PROBE.TEST_TIME_FUSION = True + + # cwT module settings + cfg.TRAINER.FILM = CN() + cfg.TRAINER.FILM.LINEAR_PROBE = True + cfg.OPTIM.LR_EXP = 6.5 + cfg.OPTIM.NEW_LAYERS = ['linear_probe', 'film'] + + #For TCP + cfg.TRAINER.TCP = CN() + cfg.TRAINER.TCP.N_CTX = 4 # number of context vectors + cfg.TRAINER.TCP.CSC = False # class-specific context + cfg.TRAINER.TCP.CTX_INIT = "" # initialization words + cfg.TRAINER.TCP.PREC = "fp16" # fp16, fp32, amp + cfg.TRAINER.TCP.W = 1.0 + cfg.TRAINER.TCP.CLASS_TOKEN_POSITION = "end" + + + #For SuPr + cfg.TRAINER.SUPR = CN() + cfg.TRAINER.SUPR.N_CTX_VISION = 4 # number of context vectors at the vision branch + cfg.TRAINER.SUPR.N_CTX_TEXT = 4 # number of context vectors at the language branch + cfg.TRAINER.SUPR.CTX_INIT = "a photo of a" # initialization words + cfg.TRAINER.SUPR.PREC = "fp16" # fp16, fp32, amp + cfg.TRAINER.SUPR.PROMPT_DEPTH_VISION = 9 # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1) + cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT = 9 # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1) + cfg.TRAINER.SUPR.SPACE_DIM = 7 # Subspace dimension + cfg.TRAINER.SUPR.ENSEMBLE_NUM = 3 # For SuPr Ens + cfg.TRAINER.SUPR.REG_LOSS_WEIGHT = 60 # Regularization loss weight lambda + cfg.TRAINER.SUPR.LAMBDA = 0.7 # Balance coefficients gamma + cfg.TRAINER.SUPR.SVD = True + cfg.TRAINER.SUPR.HARD_PROMPT_PATH = "configs/trainers/SuPr/hard_prompts/" + cfg.TRAINER.SUPR.TRAINER_BACKBONE = "SuPr" + + +def setup_cfg(args): + cfg = get_cfg_default() + extend_cfg(cfg) + + # 1. From the dataset config file + if args.dataset_config_file: + cfg.merge_from_file(args.dataset_config_file) + + # 2. From the method config file + if args.config_file: + cfg.merge_from_file(args.config_file) + + # 3. From input arguments + reset_cfg(cfg, args) + + # 4. From optional input arguments + cfg.merge_from_list(args.opts) + + cfg.freeze() + + return cfg + + +def main(args): + cfg = setup_cfg(args) + if cfg.SEED >= 0: + print("Setting fixed seed: {}".format(cfg.SEED)) + set_random_seed(cfg.SEED) + setup_logger(cfg.OUTPUT_DIR) + + if torch.cuda.is_available() and cfg.USE_CUDA: + torch.backends.cudnn.benchmark = True + + print_args(args, cfg) + print("Collecting env info ...") + print("** System info **\n{}\n".format(collect_env_info())) + + trainer = build_trainer(cfg) + + if args.eval_only: + trainer.load_model(args.model_dir, epoch=args.load_epoch) + trainer.test() + return + + + if not args.no_train: + trainer.train() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--root", type=str, default="", help="path to dataset") + parser.add_argument("--output-dir", type=str, default="", help="output directory") + parser.add_argument( + "--resume", + type=str, + default="", + help="checkpoint directory (from which the training resumes)", + ) + parser.add_argument( + "--seed", type=int, default=-1, help="only positive value enables a fixed seed" + ) + parser.add_argument( + "--source-domains", type=str, nargs="+", help="source domains for DA/DG" + ) + parser.add_argument( + "--target-domains", type=str, nargs="+", help="target domains for DA/DG" + ) + parser.add_argument( + "--transforms", type=str, nargs="+", help="data augmentation methods" + ) + parser.add_argument( + "--config-file", type=str, default="", help="path to config file" + ) + parser.add_argument( + "--dataset-config-file", + type=str, + default="", + help="path to config file for dataset setup", + ) + parser.add_argument("--trainer", type=str, default="", help="name of trainer") + parser.add_argument("--backbone", type=str, default="", help="name of CNN backbone") + parser.add_argument("--head", type=str, default="", help="name of head") + parser.add_argument("--eval-only", action="store_true", help="evaluation only") + parser.add_argument( + "--model-dir", + type=str, + default="", + help="load model from this directory for eval-only mode", + ) + parser.add_argument( + "--load-epoch", type=int, help="load model weights at this epoch for evaluation" + ) + parser.add_argument( + "--no-train", action="store_true", help="do not call trainer.train()" + ) + parser.add_argument( + "opts", + default=None, + nargs=argparse.REMAINDER, + help="modify config options using the command-line", + ) + args = parser.parse_args() + main(args) diff --git a/trainers/__init__.py b/trainers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/trainers/__pycache__/__init__.cpython-38.pyc b/trainers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2d92809c70beebeaaf275db9b2965fefac4c60dd Binary files /dev/null and b/trainers/__pycache__/__init__.cpython-38.pyc differ diff --git a/trainers/__pycache__/cocoop.cpython-38.pyc b/trainers/__pycache__/cocoop.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6b52e3dedd4dbc807f39af458e9e70759e912ca Binary files /dev/null and b/trainers/__pycache__/cocoop.cpython-38.pyc differ diff --git a/trainers/__pycache__/coop.cpython-38.pyc b/trainers/__pycache__/coop.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d0215200263a7d88b51869a9a37ddf7939a397d Binary files /dev/null and b/trainers/__pycache__/coop.cpython-38.pyc differ diff --git a/trainers/__pycache__/dept_src.cpython-38.pyc b/trainers/__pycache__/dept_src.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f29a40b34a93ba49b72758333f1f13f180f181c Binary files /dev/null and b/trainers/__pycache__/dept_src.cpython-38.pyc differ diff --git a/trainers/__pycache__/elp_promptsrc.cpython-38.pyc b/trainers/__pycache__/elp_promptsrc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13b466ea1a4596d33f0acd51427615c2aed00aad Binary files /dev/null and b/trainers/__pycache__/elp_promptsrc.cpython-38.pyc differ diff --git a/trainers/__pycache__/imagenet_templates.cpython-38.pyc b/trainers/__pycache__/imagenet_templates.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d918e6c6e88049c10aa5f9fe390bed2a06f40fe9 Binary files /dev/null and b/trainers/__pycache__/imagenet_templates.cpython-38.pyc differ diff --git a/trainers/__pycache__/independentVL.cpython-38.pyc b/trainers/__pycache__/independentVL.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3cb0984e882e8b96e11a8ae751e205cadcad6c95 Binary files /dev/null and b/trainers/__pycache__/independentVL.cpython-38.pyc differ diff --git a/trainers/__pycache__/kgcoop.cpython-38.pyc b/trainers/__pycache__/kgcoop.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83326d6d0b22ad8c2180d855cc90a03813e157b8 Binary files /dev/null and b/trainers/__pycache__/kgcoop.cpython-38.pyc differ diff --git a/trainers/__pycache__/maple.cpython-38.pyc b/trainers/__pycache__/maple.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60ef71f3af282613d1b57e4bb3d3e7e353e48c1b Binary files /dev/null and b/trainers/__pycache__/maple.cpython-38.pyc differ diff --git a/trainers/__pycache__/promptkd.cpython-38.pyc b/trainers/__pycache__/promptkd.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..829d7ed2b06de460c86728020f7ef44eab35cf65 Binary files /dev/null and b/trainers/__pycache__/promptkd.cpython-38.pyc differ diff --git a/trainers/__pycache__/promptsrc.cpython-38.pyc b/trainers/__pycache__/promptsrc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d0d7ce62bfe36f0a697fb9df90200476970a06b Binary files /dev/null and b/trainers/__pycache__/promptsrc.cpython-38.pyc differ diff --git a/trainers/__pycache__/supr.cpython-38.pyc b/trainers/__pycache__/supr.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ff81bceb67cc4c1fbaa0a6ee6e8cfb947ceccbe Binary files /dev/null and b/trainers/__pycache__/supr.cpython-38.pyc differ diff --git a/trainers/__pycache__/supr_dept.cpython-38.pyc b/trainers/__pycache__/supr_dept.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b21821bf6e922a86046232a93fda1732f639451e Binary files /dev/null and b/trainers/__pycache__/supr_dept.cpython-38.pyc differ diff --git a/trainers/__pycache__/supr_ens.cpython-38.pyc b/trainers/__pycache__/supr_ens.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa43ade55711feab56813a17c16f76e4b6ebf3a5 Binary files /dev/null and b/trainers/__pycache__/supr_ens.cpython-38.pyc differ diff --git a/trainers/__pycache__/supr_promptsrc.cpython-38.pyc b/trainers/__pycache__/supr_promptsrc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7dd87a377c70f26a437455c53e11600acc38c57 Binary files /dev/null and b/trainers/__pycache__/supr_promptsrc.cpython-38.pyc differ diff --git a/trainers/__pycache__/supr_tcp.cpython-38.pyc b/trainers/__pycache__/supr_tcp.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d6c33a8dc9b2d9e542082e30bd69984a2edd07c Binary files /dev/null and b/trainers/__pycache__/supr_tcp.cpython-38.pyc differ diff --git a/trainers/__pycache__/tcp.cpython-38.pyc b/trainers/__pycache__/tcp.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffb00d056fd4544ec3c2c1bd7c832add7ead5871 Binary files /dev/null and b/trainers/__pycache__/tcp.cpython-38.pyc differ diff --git a/trainers/__pycache__/zsclip.cpython-38.pyc b/trainers/__pycache__/zsclip.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b37c48c573dec5b8871ffb40b4164804252f6e1 Binary files /dev/null and b/trainers/__pycache__/zsclip.cpython-38.pyc differ diff --git a/trainers/cocoop.py b/trainers/cocoop.py new file mode 100644 index 0000000000000000000000000000000000000000..1a12d321fd45f4b3f29888a809780ccfbfe79851 --- /dev/null +++ b/trainers/cocoop.py @@ -0,0 +1,318 @@ +import os.path as osp +from collections import OrderedDict +import math + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + design_details = {"trainer": 'CoCoOp', + "vision_depth": 0, + "language_depth": 0, "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class PromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.COCOOP.N_CTX + ctx_init = cfg.TRAINER.COCOOP.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + vis_dim = clip_model.visual.output_dim + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1: 1 + n_ctx, :] + prompt_prefix = ctx_init + else: + # random initialization + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + self.ctx = nn.Parameter(ctx_vectors) + + self.meta_net = nn.Sequential(OrderedDict([ + ("linear1", nn.Linear(vis_dim, vis_dim // 16)), + ("relu", nn.ReLU(inplace=True)), + ("linear2", nn.Linear(vis_dim // 16, ctx_dim)) + ])) + + if cfg.TRAINER.COCOOP.PREC == "fp16": + self.meta_net.half() + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) # (n_cls, n_tkn) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx:, :]) # CLS, EOS + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + prompts = torch.cat( + [ + prefix, # (dim0, 1, dim) + ctx, # (dim0, n_ctx, dim) + suffix, # (dim0, *, dim) + ], + dim=1, + ) + + return prompts + + def forward(self, im_features): + prefix = self.token_prefix + suffix = self.token_suffix + ctx = self.ctx # (n_ctx, ctx_dim) + bias = self.meta_net(im_features) # (batch, ctx_dim) + bias = bias.unsqueeze(1) # (batch, 1, ctx_dim) + ctx = ctx.unsqueeze(0) # (1, n_ctx, ctx_dim) + ctx_shifted = ctx + bias # (batch, n_ctx, ctx_dim) + + # Use instance-conditioned context tokens for all classes + prompts = [] + for ctx_shifted_i in ctx_shifted: + ctx_i = ctx_shifted_i.unsqueeze(0).expand(self.n_cls, -1, -1) + pts_i = self.construct_prompts(ctx_i, prefix, suffix) # (n_cls, n_tkn, ctx_dim) + prompts.append(pts_i) + prompts = torch.stack(prompts) + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = PromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + image_features = self.image_encoder(image.type(self.dtype)) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + + prompts = self.prompt_learner(image_features) + + logits = [] + for pts_i, imf_i in zip(prompts, image_features): + text_features = self.text_encoder(pts_i, tokenized_prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + l_i = logit_scale * imf_i @ text_features.t() + logits.append(l_i) + logits = torch.stack(logits) + + if self.prompt_learner.training: + return F.cross_entropy(logits, label) + + return logits + + +@TRAINER_REGISTRY.register() +class CoCoOp(TrainerX): + def check_cfg(self, cfg): + assert cfg.TRAINER.COCOOP.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.COCOOP.PREC == "fp32" or cfg.TRAINER.COCOOP.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.COCOOP.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.COCOOP.PREC + if prec == "amp": + with autocast(): + loss = model(image, label) + optim.zero_grad() + scaler.scale(loss).backward() + scaler.step(optim) + scaler.update() + else: + loss = model(image, label) + optim.zero_grad() + loss.backward() + optim.step() + + loss_summary = {"loss": loss.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "token_prefix" in state_dict: + del state_dict["token_prefix"] + + if "token_suffix" in state_dict: + del state_dict["token_suffix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) \ No newline at end of file diff --git a/trainers/coop.py b/trainers/coop.py new file mode 100644 index 0000000000000000000000000000000000000000..619536ed34b2f618908f30d1c6cc0d38f98d1be2 --- /dev/null +++ b/trainers/coop.py @@ -0,0 +1,328 @@ +import os.path as osp + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + design_details = {"trainer": 'CoOp', + "vision_depth": 0, + "language_depth": 0, "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class PromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.COOP.N_CTX + ctx_init = cfg.TRAINER.COOP.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + prompt_prefix = ctx_init + + else: + # random initialization + if cfg.TRAINER.COOP.CSC: + print("Initializing class-specific contexts") + ctx_vectors = torch.empty(n_cls, n_ctx, ctx_dim, dtype=dtype) + else: + print("Initializing a generic context") + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + self.ctx = nn.Parameter(ctx_vectors) # to be optimized + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + self.class_token_position = cfg.TRAINER.COOP.CLASS_TOKEN_POSITION + + def forward(self): + ctx = self.ctx + if ctx.dim() == 2: + ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + + prefix = self.token_prefix + suffix = self.token_suffix + + if self.class_token_position == "end": + prompts = torch.cat( + [ + prefix, # (n_cls, 1, dim) + ctx, # (n_cls, n_ctx, dim) + suffix, # (n_cls, *, dim) + ], + dim=1, + ) + + elif self.class_token_position == "middle": + half_n_ctx = self.n_ctx // 2 + prompts = [] + for i in range(self.n_cls): + name_len = self.name_lens[i] + prefix_i = prefix[i : i + 1, :, :] + class_i = suffix[i : i + 1, :name_len, :] + suffix_i = suffix[i : i + 1, name_len:, :] + ctx_i_half1 = ctx[i : i + 1, :half_n_ctx, :] + ctx_i_half2 = ctx[i : i + 1, half_n_ctx:, :] + prompt = torch.cat( + [ + prefix_i, # (1, 1, dim) + ctx_i_half1, # (1, n_ctx//2, dim) + class_i, # (1, name_len, dim) + ctx_i_half2, # (1, n_ctx//2, dim) + suffix_i, # (1, *, dim) + ], + dim=1, + ) + prompts.append(prompt) + prompts = torch.cat(prompts, dim=0) + + elif self.class_token_position == "front": + prompts = [] + for i in range(self.n_cls): + name_len = self.name_lens[i] + prefix_i = prefix[i : i + 1, :, :] + class_i = suffix[i : i + 1, :name_len, :] + suffix_i = suffix[i : i + 1, name_len:, :] + ctx_i = ctx[i : i + 1, :, :] + prompt = torch.cat( + [ + prefix_i, # (1, 1, dim) + class_i, # (1, name_len, dim) + ctx_i, # (1, n_ctx, dim) + suffix_i, # (1, *, dim) + ], + dim=1, + ) + prompts.append(prompt) + prompts = torch.cat(prompts, dim=0) + + else: + raise ValueError + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = PromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + def forward(self, image): + image_features = self.image_encoder(image.type(self.dtype)) + + prompts = self.prompt_learner() + tokenized_prompts = self.tokenized_prompts + text_features = self.text_encoder(prompts, tokenized_prompts) + + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + logit_scale = self.logit_scale.exp() + logits = logit_scale * image_features @ text_features.t() + + return logits + + +@TRAINER_REGISTRY.register() +class CoOp(TrainerX): + """Context Optimization (CoOp). + + Learning to Prompt for Vision-Language Models + https://arxiv.org/abs/2109.01134 + """ + + def check_cfg(self, cfg): + assert cfg.TRAINER.COOP.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.COOP.PREC == "fp32" or cfg.TRAINER.COOP.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + + print("Turning off gradients in both the image and the text encoder") + for name, param in self.model.named_parameters(): + if "prompt_learner" not in name: + param.requires_grad_(False) + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.COOP.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + prec = self.cfg.TRAINER.COOP.PREC + if prec == "amp": + with autocast(): + output = self.model(image) + loss = F.cross_entropy(output, label) + self.optim.zero_grad() + self.scaler.scale(loss).backward() + self.scaler.step(self.optim) + self.scaler.update() + else: + output = self.model(image) + loss = F.cross_entropy(output, label) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "token_prefix" in state_dict: + del state_dict["token_prefix"] + + if "token_suffix" in state_dict: + del state_dict["token_suffix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) \ No newline at end of file diff --git a/trainers/elp_promptsrc.py b/trainers/elp_promptsrc.py new file mode 100644 index 0000000000000000000000000000000000000000..4336fcb7541e4aa159b8c91c57527f1f1d4a5b97 --- /dev/null +++ b/trainers/elp_promptsrc.py @@ -0,0 +1,508 @@ +import copy +import numpy as np +import os +import os.path as osp +import torch +import torch.nn.functional as F +from torch import nn +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_lr_scheduler + + +from .promptsrc import CustomCLIP as CustomCLIP_, PromptSRC, load_clip_to_cpu + +class FiLM(nn.Module): + def __init__(self, + dim, + bias=True, + use_sigmoid=False): + super().__init__() + self.scale = nn.Parameter(torch.ones(dim)) + self.bias = nn.Parameter(torch.zeros(dim)) if bias else None + self.has_bias = bias + self.use_sigmoid = use_sigmoid + + def forward(self, x): + scale = self.scale.unsqueeze(0).type(x.dtype) + bias = self.bias.unsqueeze(0).type(x.dtype) if self.has_bias else None + + x = scale * x + if bias is not None: + x = x + bias + + if self.use_sigmoid: + return x.sigmoid() + + return x + + + +class CustomCLIP(CustomCLIP_): + def __init__(self, cfg, classnames, clip_model): + super().__init__(cfg, classnames, clip_model) + self.subsample_classes = cfg.DATASET.SUBSAMPLE_CLASSES + self.dataset = cfg.DATASET.NAME + self.lp_cfg = cfg.TRAINER.LINEAR_PROBE + self.film_cfg = cfg.TRAINER.FILM + + clip_dim = clip_model.text_projection.size(1) + + film_cfg = self.film_cfg + + if film_cfg.LINEAR_PROBE: + self.film_lp_img = FiLM(clip_dim) + self.film_lp_text = FiLM(clip_dim) + + if (self.subsample_classes == 'base') \ + or (self.subsample_classes == 'all' and 'ImageNet' in self.dataset): + self.linear_probe_proj = nn.Linear(clip_dim, len(classnames)).type(self.dtype) + else: + self.linear_probe_proj = nn.Identity() + + def forward(self, img, labels=None): + if (self.subsample_classes == 'base') \ + or (self.subsample_classes == 'all' and 'ImageNet' in self.dataset): + return self._forward_base(img, labels) + else: + return self._forward_new(img) + + def _forward_base(self, img, labels=None): + text_feats, img_feats = self._forward_feats(img) + + if self.prompt_learner.training: + zs_text_feats, zs_img_feats = self._forward_zsfeats(img) + logits, zs_logits = self._forward_logits_similarity(text_feats, img_feats, + zs_text_feats, zs_img_feats) + logits_lp, labels_lp = self._forward_logits_linear_probe(text_feats, img_feats, labels, + zs_text_feats, zs_img_feats) + + text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True) + img_feats = img_feats / img_feats.norm(dim=-1, keepdim=True) + zs_text_feats = zs_text_feats / zs_text_feats.norm(dim=-1, keepdim=True) + zs_img_feats = zs_img_feats / zs_img_feats.norm(dim=-1, keepdim=True) + + return self._loss(logits, labels, logits_lp, labels_lp), \ + text_feats, zs_text_feats, zs_img_feats, img_feats, zs_logits, logits + else: + logits = self._forward_logits_similarity(text_feats, img_feats) + logits_lp, _ = self._forward_logits_linear_probe(text_feats, img_feats) + + if not self.lp_cfg.TEST_TIME_FUSION: + return logits_lp + + lp_weight = self.lp_cfg.WEIGHT + logits = (1 - lp_weight) * logits + lp_weight * logits_lp + return logits + + def _forward_new(self, img): + assert not self.prompt_learner.training + + text_feats, img_feats = self._forward_feats(img) + logits = self._forward_logits_similarity(text_feats, img_feats) + return logits + + def _forward_feats(self, img): + tokenized_prompts = self.tokenized_prompts + prompts = self.prompt_learner() + + text_features = self.text_encoder(prompts, tokenized_prompts) + image_features = self.image_encoder(img.type(self.dtype)) + return text_features, image_features + + def _forward_zsfeats(self, img): + fixed_embeddings = self.prompt_learner.fixed_embeddings + + with torch.no_grad(): + zero_shot_features = self.prompt_learner.ZS_image_encoder(img.type(self.dtype)) + + return fixed_embeddings, zero_shot_features + + def _forward_logits_similarity(self, text_feats, img_feats, zs_text_feats=None, zs_img_feats=None): + text_feats_norm = text_feats / text_feats.norm(dim=-1, keepdim=True) + img_feats_norm = img_feats / img_feats.norm(dim=-1, keepdim=True) + + logit_scale = self.logit_scale.exp() + logits = logit_scale * img_feats_norm @ text_feats_norm.t() + + if zs_text_feats is not None and zs_img_feats is not None: + zs_text_feats_norm = zs_text_feats / zs_text_feats.norm(dim=-1, keepdim=True) + + with torch.no_grad(): + zs_img_feats_norm = zs_img_feats / zs_img_feats.norm(dim=-1, keepdim=True) + zs_logits = logit_scale * zs_img_feats_norm.cuda() @ zs_text_feats_norm.half().cuda().t() + + return logits, zs_logits + else: + return logits + + def _forward_logits_linear_probe(self, text_feats, img_feats, labels=None, + zs_text_feats=None, zs_img_feats=None): + if self.film_cfg.LINEAR_PROBE: + text_feats = self.film_lp_text(text_feats) + img_feats = self.film_lp_img(img_feats) + + if labels is None: + all_feats = img_feats + all_labels = labels + else: + text_feats = text_feats[labels] + all_feats = torch.cat([text_feats, img_feats]) + all_labels = torch.cat([labels, labels]) + + all_logits = self.linear_probe_proj(all_feats) + return all_logits, all_labels + + def _loss(self, logits, labels, logits_lp, labels_lp): + loss_cls = F.cross_entropy(logits, labels) + loss_cls_lp = F.cross_entropy(logits_lp, labels_lp) + + # cls_weight = self.lp_cfg.CLS_WEIGHT + # lp_weight = self.lp_cfg.WEIGHT + lp_weight = self.lp_cfg.WEIGHT + loss = (1 - lp_weight) * loss_cls + lp_weight * loss_cls_lp + # loss = cls_weight * loss_cls + lp_weight * loss_cls_lp + return loss + + +@TRAINER_REGISTRY.register() +class ExtrasLinearProbePromptSRC(PromptSRC): + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.PROMPTSRC.PREC == "fp32" or cfg.TRAINER.PROMPTSRC.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + + print("Turning off gradients in both the image and the text encoder") + # names_to_update = cfg.TRAINER.NAMES_TO_UPDATE + names_to_update = ['prompt_learner', 'linear_probe', 'film','VPT'] + for name, param in self.model.named_parameters(): + update = False + + for name_to_update in names_to_update: + if name_to_update in name: + update = True + break + + if "ZS_image_encoder" in name: + update = False + + param.requires_grad_(update) + + enabled = [] + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.append(name) + print(f"Parameters to be updated: {list(sorted(enabled))}") + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim, infos = build_optimizer(self.model, cfg.OPTIM) + + if infos is not None: + print('Learning rate of parameters:') + for info in infos: + print('lr: {}, layers: {}'.format(info['lr'], info['layers'])) + + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("VLPromptLearner", self.model, self.optim, self.sched) + + # Cosine scheduler + self.total_epochs = cfg.OPTIM.MAX_EPOCH + self.step_counter = 1 + N = cfg.OPTIM.MAX_EPOCH + mean = cfg.TRAINER.PROMPTSRC.GPA_MEAN + stdev = cfg.TRAINER.PROMPTSRC.GPA_STD + gauss = self.get_gauss(mean, stdev) + self.gauss = np.array([gauss(a) for a in range(1, N + 1)]) + self.gauss = self.gauss / sum(self.gauss) + self.scaler = GradScaler() if cfg.TRAINER.PROMPTSRC.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + + # Keep model with GPA + self.previous_model_gpa = None + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.PROMPTSRC.PREC + if prec == "amp": + with autocast(): + loss = model(image, label) + optim.zero_grad() + scaler.scale(loss).backward() + scaler.step(optim) + scaler.update() + else: + loss_ce, normalized_text_features, zs_clip_text_embeddings, zs_image_embedd, image_ft, \ + zero_shot_logits, logits = model(image, label) + # Calculate the L_SCL_text loss + loss_scl_text = F.l1_loss(normalized_text_features, zs_clip_text_embeddings.cuda(), + reduction='mean') * self.cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT + # Calculate the L_SCL_image loss + loss_scl_image = F.l1_loss(image_ft, zs_image_embedd.cuda(), + reduction='mean') * self.cfg.TRAINER.PROMPTSRC.IMAGE_LOSS_WEIGHT + # Now calculate L_SCL_logits + L_SCL_logits = F.kl_div( + F.log_softmax(logits / 1, dim=1), + F.log_softmax(zero_shot_logits / 1, dim=1), + reduction='sum', + log_target=True + ) * (1 * 1) / logits.numel() + L_SCL = (L_SCL_logits + loss_scl_text + loss_scl_image) + loss = (loss_ce + L_SCL) + optim.zero_grad() + loss.backward() + optim.step() + + loss_summary = {"loss": loss.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + # Means one epoch is completed, perform GPA + self.step_counter = self.step_counter + 1 + current_epoch_weight = self.gauss[self.step_counter - 2] + current_model_weights = copy.deepcopy(model.state_dict()) + weighted_state_dict = self.state_dict_weighting(current_model_weights, current_epoch_weight) + if self.previous_model_gpa is None: + self.previous_model_gpa = weighted_state_dict + else: + self.previous_model_gpa = self.state_dict_add(weighted_state_dict, self.previous_model_gpa) + + if self.step_counter == self.model.total_epochs + 1: + print("Using GPA model for final inference...") + model.load_state_dict(self.previous_model_gpa) + self.model.load_state_dict(self.previous_model_gpa) + + return loss_summary + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + if epoch < 0: + all_model_files = os.listdir(osp.join(directory, name)) + all_model_files = [file_ for file_ in all_model_files if file_ != 'checkpoint'] + model_epochs = [int(file_.split('-')[-1]) for file_ in all_model_files] + last_epoch = max(model_epochs) + model_file = 'model.pth.tar-' + str(last_epoch) + + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "prompt_learner.token_prefix" in state_dict: + del state_dict["prompt_learner.token_prefix"] + + if "prompt_learner.token_suffix" in state_dict: + del state_dict["prompt_learner.token_suffix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + + if self.cfg.DATASET.NAME in ['ImageNetA', 'ImageNetR']: + from datasets.imagenet import ImageNet + from dassl.utils import listdir_nohidden + + dataset = self.dm.dataset + text_file = osp.join(dataset.dataset_dir, "classnames.txt") + all_folders = ImageNet.read_classnames(text_file).keys() + + TO_BE_IGNORED = ["README.txt"] + folders = listdir_nohidden(dataset.image_dir, sort=True) + folders = [f for f in folders if f not in TO_BE_IGNORED] + is_reserves = [f in folders for f in all_folders] + + print(f'State dict is CLIPPED to match the shape of target dataset {self.cfg.DATASET.NAME}!') + state_dict['linear_probe_proj.weight'] = state_dict['linear_probe_proj.weight'][is_reserves] + state_dict['linear_probe_proj.bias'] = state_dict['linear_probe_proj.bias'][is_reserves] + + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) + + + +""" modified from dassl.optim """ +import warnings +import torch +import torch.nn as nn + +from dassl.optim.radam import RAdam + +AVAI_OPTIMS = ['adam', 'amsgrad', 'sgd', 'rmsprop', 'radam', 'adamw'] + + +def build_optimizer(model, optim_cfg, param_groups=None): + optim = optim_cfg.NAME + lr = optim_cfg.LR + weight_decay = optim_cfg.WEIGHT_DECAY + momentum = optim_cfg.MOMENTUM + sgd_dampening = optim_cfg.SGD_DAMPNING + sgd_nesterov = optim_cfg.SGD_NESTEROV + rmsprop_alpha = optim_cfg.RMSPROP_ALPHA + adam_beta1 = optim_cfg.ADAM_BETA1 + adam_beta2 = optim_cfg.ADAM_BETA2 + staged_lr = optim_cfg.STAGED_LR + new_layers = optim_cfg.NEW_LAYERS + base_lr_mult = optim_cfg.BASE_LR_MULT + + if optim not in AVAI_OPTIMS: + raise ValueError( + f'optim must be one of {AVAI_OPTIMS}, but got {optim}' + ) + + if param_groups is not None and staged_lr: + warnings.warn( + 'staged_lr will be ignored, if you need to use staged_lr, ' + 'please bind it with param_groups yourself.' + ) + + if param_groups is None: + if staged_lr: + # modify the function of lr_mult + exp = optim_cfg.LR_EXP + lr *= exp + base_lr_mult /= exp + + if not isinstance(model, nn.Module): + raise TypeError( + 'When staged_lr is True, model given to ' + 'build_optimizer() must be an instance of nn.Module' + ) + + if isinstance(model, nn.DataParallel): + model = model.module + + if isinstance(new_layers, str): + if new_layers is None: + warnings.warn('new_layers is empty (staged_lr is useless)') + new_layers = [new_layers] + + base_params, new_params = [], [] + base_layers, new_layers_ = [], [] + + for name, module in model.named_children(): + is_new = False + + for layer in new_layers: + if layer in name: + is_new = True + break + + if is_new: + new_params += [p for p in module.parameters()] + new_layers_.append(name) + else: + base_params += [p for p in module.parameters()] + base_layers.append(name) + + param_groups = [{'params': base_params, + 'lr': lr * base_lr_mult}, + {'params': new_params}] + + # return lr of each layer + infos = [{'layers': base_layers, + 'lr': lr * base_lr_mult}, + {'layers': new_layers_, + 'lr': lr}] + else: + if isinstance(model, nn.Module): + param_groups = model.parameters() + else: + param_groups = model + + infos = None + + if optim == 'adam': + optimizer = torch.optim.Adam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + + elif optim == 'amsgrad': + optimizer = torch.optim.Adam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + amsgrad=True, + ) + + elif optim == 'sgd': + optimizer = torch.optim.SGD( + param_groups, + lr=lr, + momentum=momentum, + weight_decay=weight_decay, + dampening=sgd_dampening, + nesterov=sgd_nesterov, + ) + + elif optim == 'rmsprop': + optimizer = torch.optim.RMSprop( + param_groups, + lr=lr, + momentum=momentum, + weight_decay=weight_decay, + alpha=rmsprop_alpha, + ) + + elif optim == 'radam': + optimizer = RAdam( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + + elif optim == 'adamw': + optimizer = torch.optim.AdamW( + param_groups, + lr=lr, + weight_decay=weight_decay, + betas=(adam_beta1, adam_beta2), + ) + else: + raise NotImplementedError(f'Optimizer {optim} not implemented yet!') + + return optimizer, infos \ No newline at end of file diff --git a/trainers/imagenet_templates.py b/trainers/imagenet_templates.py new file mode 100644 index 0000000000000000000000000000000000000000..73dac7b4c6d80c008a1063d59871b90ffbf31fbe --- /dev/null +++ b/trainers/imagenet_templates.py @@ -0,0 +1,76 @@ +# source: https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb + +IMAGENET_TEMPLATES = [ + "a photo of a {}.", + "a bad photo of a {}.", + "a photo of many {}.", + "a sculpture of a {}.", + "a photo of the hard to see {}.", + "a low resolution photo of the {}.", + "a rendering of a {}.", + "graffiti of a {}.", + "a bad photo of the {}.", + "a cropped photo of the {}.", + "a tattoo of a {}.", + "the embroidered {}.", + "a photo of a hard to see {}.", + "a bright photo of a {}.", + "a photo of a clean {}.", + "a photo of a dirty {}.", + "a dark photo of the {}.", + "a drawing of a {}.", + "a photo of my {}.", + "the plastic {}.", + "a photo of the cool {}.", + "a close-up photo of a {}.", + "a black and white photo of the {}.", + "a painting of the {}.", + "a painting of a {}.", + "a pixelated photo of the {}.", + "a sculpture of the {}.", + "a bright photo of the {}.", + "a cropped photo of a {}.", + "a plastic {}.", + "a photo of the dirty {}.", + "a jpeg corrupted photo of a {}.", + "a blurry photo of the {}.", + "a photo of the {}.", + "a good photo of the {}.", + "a rendering of the {}.", + "a {} in a video game.", + "a photo of one {}.", + "a doodle of a {}.", + "a close-up photo of the {}.", + "the origami {}.", + "the {} in a video game.", + "a sketch of a {}.", + "a doodle of the {}.", + "a origami {}.", + "a low resolution photo of a {}.", + "the toy {}.", + "a rendition of the {}.", + "a photo of the clean {}.", + "a photo of a large {}.", + "a rendition of a {}.", + "a photo of a nice {}.", + "a photo of a weird {}.", + "a blurry photo of a {}.", + "a cartoon {}.", + "art of a {}.", + "a sketch of the {}.", + "a embroidered {}.", + "a pixelated photo of a {}.", + "itap of the {}.", +] + + + +IMAGENET_TEMPLATES_SELECT = [ + "itap of a {}.", + "a bad photo of the {}.", + "a origami {}.", + "a photo of the large {}.", + "a {} in a video game.", + "art of the {}.", + "a photo of the small {}.", +] \ No newline at end of file diff --git a/trainers/independentVL.py b/trainers/independentVL.py new file mode 100644 index 0000000000000000000000000000000000000000..8bdfd2e525f46ee381206f4e182677a156130279 --- /dev/null +++ b/trainers/independentVL.py @@ -0,0 +1,301 @@ +import os.path as osp + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + design_details = {"trainer": 'IVLP', + "vision_depth": cfg.TRAINER.IVLP.PROMPT_DEPTH_VISION, + "language_depth": cfg.TRAINER.IVLP.PROMPT_DEPTH_TEXT, "vision_ctx": cfg.TRAINER.IVLP.N_CTX_VISION, + "language_ctx": cfg.TRAINER.IVLP.N_CTX_TEXT} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class VLPromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + # Make sure Language depth >= 1 + assert cfg.TRAINER.IVLP.PROMPT_DEPTH_TEXT >= 1, "In Independent VL prompting, Language prompt depth should be >=1" \ + "\nPlease use VPT trainer if you want to learn only vision " \ + "branch " + n_ctx = cfg.TRAINER.IVLP.N_CTX_TEXT + ctx_init = cfg.TRAINER.IVLP.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + vis_dim = clip_model.visual.output_dim + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init and (n_ctx) <= 4: + # Use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = n_ctx + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1: 1 + n_ctx, :] + prompt_prefix = ctx_init + else: + # Random initialization + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + print(f"Independent V-L design") + print(f'Initial text context: "{prompt_prefix}"') + print(f"Number of context words (tokens) for Language prompting: {n_ctx}") + print(f"Number of context words (tokens) for Vision prompting: {cfg.TRAINER.IVLP.N_CTX_VISION}") + self.ctx = nn.Parameter(ctx_vectors) + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) # (n_cls, n_tkn) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx:, :]) # CLS, EOS + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + prompts = torch.cat( + [ + prefix, # (dim0, 1, dim) + ctx, # (dim0, n_ctx, dim) + suffix, # (dim0, *, dim) + ], + dim=1, + ) + + return prompts + + def forward(self): + ctx = self.ctx + if ctx.dim() == 2: + ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + + prefix = self.token_prefix + suffix = self.token_suffix + prompts = self.construct_prompts(ctx, prefix, suffix) + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = VLPromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + prompts = self.prompt_learner() + text_features = self.text_encoder(prompts, tokenized_prompts) + image_features = self.image_encoder(image.type(self.dtype)) + + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + logits = logit_scale * image_features @ text_features.t() + + if self.prompt_learner.training: + return F.cross_entropy(logits, label) + + return logits + + +@TRAINER_REGISTRY.register() +class IVLP(TrainerX): + def check_cfg(self, cfg): + assert cfg.TRAINER.IVLP.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.IVLP.PREC == "fp32" or cfg.TRAINER.IVLP.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + # Make sure that VPT prompts are updated + if "VPT" in name: + param.requires_grad_(True) + else: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("VLPromptLearner", self.model, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.IVLP.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.IVLP.PREC + if prec == "amp": + with autocast(): + loss = model(image, label) + optim.zero_grad() + scaler.scale(loss).backward() + scaler.step(optim) + scaler.update() + else: + loss = model(image, label) + optim.zero_grad() + loss.backward() + optim.step() + + loss_summary = {"loss": loss.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "prompt_learner.token_prefix" in state_dict: + del state_dict["prompt_learner.token_prefix"] + + if "prompt_learner.token_suffix" in state_dict: + del state_dict["prompt_learner.token_suffix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) \ No newline at end of file diff --git a/trainers/kgcoop.py b/trainers/kgcoop.py new file mode 100644 index 0000000000000000000000000000000000000000..ae820f0d61c1c66d42efe6404cdca6b630145f15 --- /dev/null +++ b/trainers/kgcoop.py @@ -0,0 +1,375 @@ +import os.path as osp + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast +from collections import OrderedDict + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + design_details = {"trainer": 'CoOp', + "vision_depth": 0, + "language_depth": 0, "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +CUSTOM_TEMPLATES = { + "OxfordPets": "a photo of a {}, a type of pet.", + "OxfordFlowers": "a photo of a {}, a type of flower.", + "FGVCAircraft": "a photo of a {}, a type of aircraft.", + "DescribableTextures": "a photo of a {}, a type of texture.", + "EuroSAT": "a centered satellite photo of {}.", + #"EuroSAT": "a photo of a {}.", + "StanfordCars": "a photo of a {}.", + "Food101": "a photo of {}, a type of food.", + "SUN397": "a photo of a {}.", + "Caltech101": "a photo of a {}.", + "UCF101": "a photo of a person doing {}.", + "ImageNet": "a photo of a {}.", + "ImageNetSketch": "a photo of a {}.", + "ImageNetV2": "a photo of a {}.", + "ImageNetA": "a photo of a {}.", + "ImageNetR": "a photo of a {}.", +} + + + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class PromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.COOP.N_CTX + ctx_init = cfg.TRAINER.COOP.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init: + # use given words to initialize context vectors + temp = 'a photo of a' + ctx_init = temp.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + prompt_prefix = ctx_init + + else: + # random initialization + if cfg.TRAINER.COOP.CSC: + print("Initializing class-specific contexts") + ctx_vectors = torch.empty(n_cls, n_ctx, ctx_dim, dtype=dtype) + else: + print("Initializing a generic context") + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + self.ctx = nn.Parameter(ctx_vectors) # to be optimized + + bias_vectors = torch.empty(1, 512, dtype=dtype) + nn.init.normal_(bias_vectors, std=0.02) + self.bias_vectors = nn.Parameter(bias_vectors) + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + #print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model_ = load_clip_to_cpu(cfg) + clip_model_.cuda() + + #prompts_ = [prompt_prefix + " " + name + "." for name in classnames] + temp = CUSTOM_TEMPLATES[cfg.DATASET.NAME] + prompts_ = [temp.format(c.replace("_", " ")) for c in classnames] + print(f"Prompts: {prompts_}") + prompts_ = torch.cat([clip.tokenize(p) for p in prompts_]) + prompts_ = prompts_.cuda() + + with torch.no_grad(): + text_features = clip_model_.encode_text(prompts_) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + self.text_features = text_features + + self.meta_net = nn.Sequential(OrderedDict([ + ("linear1", nn.Linear(512, 512)), + ("relu", nn.ReLU(inplace=True)) + #("linear2", nn.Linear(128, 512)) + ])) + + + if cfg.TRAINER.COCOOP.PREC == "fp16": + self.meta_net.half() + + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + self.class_token_position = cfg.TRAINER.COOP.CLASS_TOKEN_POSITION + + def forward(self): + ctx = self.ctx + + if ctx.dim() == 2: + ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + + prefix = self.token_prefix + suffix = self.token_suffix + + prompts = torch.cat( + [ + prefix, # (n_cls, 1, dim) + ctx, + suffix, # (n_cls, *, dim) + ], + dim=1, + ) + + return prompts + + +class Adapter(nn.Module): + def __init__(self, c_in, reduction=4): + super(Adapter, self).__init__() + self.fc = nn.Sequential( + nn.Linear(c_in, c_in // reduction, bias=False), + nn.ReLU(inplace=True), + nn.Linear(c_in // reduction, c_in, bias=False), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + x = self.fc(x) + return x + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = PromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.ori_embedding = self.prompt_learner.text_features + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + self.meta_net = self.prompt_learner.meta_net + self.adapter = Adapter(512, 4).to(clip_model.dtype) + + def forward(self, image): + prompts = self.prompt_learner() + image_features = self.image_encoder(image.type(self.dtype)) + + tokenized_prompts = self.tokenized_prompts + text_features = self.text_encoder(prompts, tokenized_prompts) + text_features_old = self.ori_embedding + + + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + logit_scale = self.logit_scale.exp() + + logits = logit_scale * image_features @ text_features.t() + + cos = torch.nn.CosineSimilarity(dim=1,eps=1e-07) + text_features_old = text_features_old / text_features_old.norm(dim=-1, keepdim=True) + score = cos(text_features,text_features_old) + score = 1.0-torch.mean(score) + + return logits, score + + +@TRAINER_REGISTRY.register() +class KgCoOp(TrainerX): + + def check_cfg(self, cfg): + assert cfg.TRAINER.COOP.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.COOP.PREC == "fp32" or cfg.TRAINER.COOP.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + self.w = cfg.TRAINER.COOP.W + + print("Turning off gradients in both the image and the text encoder") + for name, param in self.model.named_parameters(): + #if "prompt_learner" not in name: # and "adapter" not in name: + if "ctx" not in name: + param.requires_grad_(False) + else: + print(name) + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) + + #self.optim_ = build_optimizer(self.model.adapter, cfg.OPTIM) + #self.sched_ = build_lr_scheduler(self.optim, cfg.OPTIM) + #self.register_model('clip_adapter', self.model.adapter, self.optim_, self.sched_) + + self.scaler = GradScaler() if cfg.TRAINER.COOP.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + prec = self.cfg.TRAINER.COOP.PREC + if prec == "amp": + with autocast(): + output = self.model(image) + loss = F.cross_entropy(output, label) + self.optim.zero_grad() + self.scaler.scale(loss).backward() + self.scaler.step(self.optim) + self.scaler.update() + else: + output,score = self.model(image) + loss = F.cross_entropy(output, label)+self.w*score + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + + if (self.batch_idx + 1) == self.num_batches: + #self.update_lr() + self.sched.step() + #self.sched_.step() + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def model_inference(self, input): + return self.model(input)[0] + + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + print(names) + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "token_prefix" in state_dict: + del state_dict["token_prefix"] + + if "token_suffix" in state_dict: + del state_dict["token_suffix"] + + if "token_midfix" in state_dict: + del state_dict["token_midfix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) \ No newline at end of file diff --git a/trainers/maple.py b/trainers/maple.py new file mode 100644 index 0000000000000000000000000000000000000000..f2f1a9c501abb824ee03ec7fe17aa3f72fc8d232 --- /dev/null +++ b/trainers/maple.py @@ -0,0 +1,333 @@ +import os.path as osp +from collections import OrderedDict +import math +import copy +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + design_details = {"trainer": 'MaPLe', + "vision_depth": 0, + "language_depth": 0, "vision_ctx": 0, + "language_ctx": 0, + "maple_length": cfg.TRAINER.MAPLE.N_CTX} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts, compound_prompts_deeper_text): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + # Pass as the list, as nn.sequential cannot process multiple arguments in the forward pass + combined = [x, compound_prompts_deeper_text, 0] # third argument is the counter which denotes depth of prompt + outputs = self.transformer(combined) + x = outputs[0] # extract the x back from here + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class MultiModalPromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.MAPLE.N_CTX + ctx_init = cfg.TRAINER.MAPLE.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + # Default is 1, which is compound shallow prompting + assert cfg.TRAINER.MAPLE.PROMPT_DEPTH >= 1, "For MaPLe, PROMPT_DEPTH should be >= 1" + self.compound_prompts_depth = cfg.TRAINER.MAPLE.PROMPT_DEPTH # max=12, but will create 11 such shared prompts + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init and (n_ctx) <= 4: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = n_ctx + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1: 1 + n_ctx, :] + prompt_prefix = ctx_init + else: + # random initialization + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + print('MaPLe design: Multi-modal Prompt Learning') + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of MaPLe context words (tokens): {n_ctx}") + # These below, related to the shallow prompts + # Linear layer so that the tokens will project to 512 and will be initialized from 768 + self.proj = nn.Linear(ctx_dim, 768) + self.proj.half() + self.ctx = nn.Parameter(ctx_vectors) + # These below parameters related to the shared prompts + # Define the compound prompts for the deeper layers + + # Minimum can be 1, which defaults to shallow MaPLe + # compound prompts + self.compound_prompts_text = nn.ParameterList([nn.Parameter(torch.empty(n_ctx, 512)) + for _ in range(self.compound_prompts_depth - 1)]) + for single_para in self.compound_prompts_text: + nn.init.normal_(single_para, std=0.02) + # Also make corresponding projection layers, for each prompt + single_layer = nn.Linear(ctx_dim, 768) + self.compound_prompt_projections = _get_clones(single_layer, self.compound_prompts_depth - 1) + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) # (n_cls, n_tkn) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx:, :]) # CLS, EOS + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + prompts = torch.cat( + [ + prefix, # (dim0, 1, dim) + ctx, # (dim0, n_ctx, dim) + suffix, # (dim0, *, dim) + ], + dim=1, + ) + + return prompts + + def forward(self): + ctx = self.ctx + + if ctx.dim() == 2: + ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + + prefix = self.token_prefix + suffix = self.token_suffix + prompts = self.construct_prompts(ctx, prefix, suffix) + + # Before returning, need to transform + # prompts to 768 for the visual side + visual_deep_prompts = [] + for index, layer in enumerate(self.compound_prompt_projections): + visual_deep_prompts.append(layer(self.compound_prompts_text[index])) + # Now the other way around + # We will project the textual prompts from 512 to 768 + return prompts, self.proj(self.ctx), self.compound_prompts_text, visual_deep_prompts # pass here original, as for visual 768 is required + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = MultiModalPromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + prompts, shared_ctx, deep_compound_prompts_text, deep_compound_prompts_vision = self.prompt_learner() + text_features = self.text_encoder(prompts, tokenized_prompts, deep_compound_prompts_text) + image_features = self.image_encoder(image.type(self.dtype), shared_ctx, deep_compound_prompts_vision) + + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + logits = logit_scale * image_features @ text_features.t() + + if self.prompt_learner.training: + return F.cross_entropy(logits, label) + + return logits + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + + +@TRAINER_REGISTRY.register() +class MaPLe(TrainerX): + def check_cfg(self, cfg): + assert cfg.TRAINER.MAPLE.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.MAPLE.PREC == "fp32" or cfg.TRAINER.MAPLE.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + # Make sure that VPT prompts are updated + if "VPT" in name: + param.requires_grad_(True) + else: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("MultiModalPromptLearner", self.model, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.MAPLE.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.MAPLE.PREC + if prec == "amp": + with autocast(): + loss = model(image, label) + optim.zero_grad() + scaler.scale(loss).backward() + scaler.step(optim) + scaler.update() + else: + loss = model(image, label) + optim.zero_grad() + loss.backward() + optim.step() + + loss_summary = {"loss": loss.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "prompt_learner.token_prefix" in state_dict: + del state_dict["prompt_learner.token_prefix"] + + if "prompt_learner.token_suffix" in state_dict: + del state_dict["prompt_learner.token_suffix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) \ No newline at end of file diff --git a/trainers/promptsrc.py b/trainers/promptsrc.py new file mode 100644 index 0000000000000000000000000000000000000000..7a2e176d232e9bdbbd6b7a22cada84322e4c480f --- /dev/null +++ b/trainers/promptsrc.py @@ -0,0 +1,401 @@ +import copy +import os.path as osp +import numpy as np +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer +from .imagenet_templates import IMAGENET_TEMPLATES + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg, zero_shot_model=False): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + if not zero_shot_model: + design_details = {"trainer": 'IVLP', + "vision_depth": cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_VISION, + "language_depth": cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_TEXT, + "vision_ctx": cfg.TRAINER.PROMPTSRC.N_CTX_VISION, + "language_ctx": cfg.TRAINER.PROMPTSRC.N_CTX_TEXT} + model = clip.build_model(state_dict or model.state_dict(), design_details) + else: + # Return original CLIP model for generating frozen VL features + design_details = {"trainer": 'IVLP', + "vision_depth": 0, + "language_depth": 0, "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + return model + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, tokenized_prompts): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + + return x + + +class VLPromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + # Make sure Language depth >= 1 + assert cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_TEXT >= 1, "In Independent VL prompting, Language prompt depth should be >=1" \ + "\nPlease use VPT trainer if you want to learn only vision " \ + "branch" + n_ctx = cfg.TRAINER.PROMPTSRC.N_CTX_TEXT + ctx_init = cfg.TRAINER.PROMPTSRC.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init and n_ctx <= 4: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = n_ctx + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1: 1 + n_ctx, :] + prompt_prefix = ctx_init + else: + # random initialization + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + print(f"Independent V-L design") + print(f'Initial text context: "{prompt_prefix}"') + print(f"Number of context words (tokens) for Language prompting: {n_ctx}") + print(f"Number of context words (tokens) for Vision prompting: {cfg.TRAINER.PROMPTSRC.N_CTX_VISION}") + self.ctx = nn.Parameter(ctx_vectors) + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) # (n_cls, n_tkn) + # Also create frozen CLIP + clip_model_temp = load_clip_to_cpu(cfg, True).float().cuda() + clip_model_temp_image = load_clip_to_cpu(cfg, True) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + self.ZS_image_encoder = clip_model_temp_image.visual + # Now pre-compute the frozen VL embeddings + all_teacher_features = [] + # Using multiple text templates to ensure textual diversity during training + for single_template in IMAGENET_TEMPLATES: + x = [single_template.replace("{}", name) for name in classnames] + x_tokenized = torch.cat([clip.tokenize(p) for p in x]) + text_features = clip_model_temp.encode_text(x_tokenized.cuda()) + all_teacher_features.append(text_features.unsqueeze(1)) + + self.fixed_embeddings = torch.cat(all_teacher_features, dim=1).mean(dim=1) + # These token vectors will be saved when in save_model(), + # but they should be ignored in load_model() as we want to use + # those computed using the current class names + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx:, :]) # CLS, EOS + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.name_lens = name_lens + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + prompts = torch.cat( + [ + prefix, # (dim0, 1, dim) + ctx, # (dim0, n_ctx, dim) + suffix, # (dim0, *, dim) + ], + dim=1, + ) + + return prompts + + def forward(self): + ctx = self.ctx + if ctx.dim() == 2: + ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + + prefix = self.token_prefix + suffix = self.token_suffix + prompts = self.construct_prompts(ctx, prefix, suffix) + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = VLPromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + self.total_epochs = cfg.OPTIM.MAX_EPOCH + self.n_cls = len(classnames) + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + prompts = self.prompt_learner() + # Compute the prompted image and text features + text_features = self.text_encoder(prompts, tokenized_prompts) + image_features = self.image_encoder(image.type(self.dtype)) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + # Compute the prompted logits + logits = logit_scale * image_features @ text_features.t() + if self.prompt_learner.training: + # Now calculate the frozen pre-trained features + fixed_embeddings = self.prompt_learner.fixed_embeddings # precomputed pre-trained frozen textual features + fixed_embeddings = fixed_embeddings / fixed_embeddings.norm(dim=-1, keepdim=True) + with torch.no_grad(): + zero_shot_features = self.prompt_learner.ZS_image_encoder(image.type(self.dtype)) + zero_shot_features = zero_shot_features / zero_shot_features.norm(dim=-1, keepdim=True) + # Compute pre-trained frozen visual features + zero_shot_logits = logit_scale * zero_shot_features.cuda() @ fixed_embeddings.half().cuda().t() + + return F.cross_entropy(logits, + label), text_features, fixed_embeddings, zero_shot_features, \ + image_features, zero_shot_logits, logits + else: + return logits + + +@TRAINER_REGISTRY.register() +class PromptSRC(TrainerX): + def check_cfg(self, cfg): + assert cfg.TRAINER.PROMPTSRC.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.PROMPTSRC.PREC == "fp32" or cfg.TRAINER.PROMPTSRC.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + # Make sure that VPT prompts are updated + if "VPT" in name: + param.requires_grad_(True) + else: + param.requires_grad_(False) + else: + if "ZS_image_encoder" in name: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + print(f"Parameters count: {len(enabled)}") + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("VLPromptLearner", self.model, self.optim, self.sched) + # Cosine scheduler + self.total_epochs = cfg.OPTIM.MAX_EPOCH + self.step_counter = 1 + N = cfg.OPTIM.MAX_EPOCH + mean = cfg.TRAINER.PROMPTSRC.GPA_MEAN + stdev = cfg.TRAINER.PROMPTSRC.GPA_STD + gauss = self.get_gauss(mean, stdev) + self.gauss = np.array([gauss(a) for a in range(1, N + 1)]) + self.gauss = self.gauss / sum(self.gauss) + self.scaler = GradScaler() if cfg.TRAINER.PROMPTSRC.PREC == "amp" else None + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + # Keep model with GPA + self.previous_model_gpa = None + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.PROMPTSRC.PREC + if prec == "amp": + with autocast(): + loss = model(image, label) + optim.zero_grad() + scaler.scale(loss).backward() + scaler.step(optim) + scaler.update() + else: + loss_ce, normalized_text_features, zs_clip_text_embeddings, zs_image_embedd, image_ft, \ + zero_shot_logits, logits = model(image, label) + # Calculate the L_SCL_text loss + loss_scl_text = F.l1_loss(normalized_text_features, zs_clip_text_embeddings.cuda(), + reduction='mean') * self.cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT + # Calculate the L_SCL_image loss + loss_scl_image = F.l1_loss(image_ft, zs_image_embedd.cuda(), + reduction='mean') * self.cfg.TRAINER.PROMPTSRC.IMAGE_LOSS_WEIGHT + # Now calculate L_SCL_logits + L_SCL_logits = F.kl_div( + F.log_softmax(logits / 1, dim=1), + F.log_softmax(zero_shot_logits / 1, dim=1), + reduction='sum', + log_target=True + ) * (1 * 1) / logits.numel() + L_SCL = (L_SCL_logits + loss_scl_text + loss_scl_image) + loss = (loss_ce + L_SCL) + optim.zero_grad() + loss.backward() + optim.step() + + loss_summary = {"loss": loss.item()} + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + # Means one epoch is completed, perform GPA + self.step_counter = self.step_counter + 1 + current_epoch_weight = self.gauss[self.step_counter - 2] + current_model_weights = copy.deepcopy(model.state_dict()) + weighted_state_dict = self.state_dict_weighting(current_model_weights, current_epoch_weight) + if self.previous_model_gpa is None: + self.previous_model_gpa = weighted_state_dict + else: + self.previous_model_gpa = self.state_dict_add(weighted_state_dict, self.previous_model_gpa) + + if self.step_counter == self.model.total_epochs + 1: + print("Using GPA model for final inference...") + model.load_state_dict(self.previous_model_gpa) + self.model.load_state_dict(self.previous_model_gpa) + return loss_summary + + def state_dict_weighting(self, main_dict, weightage, prompt_only=False): + # Average all parameters + updated_dict = copy.deepcopy(main_dict) + if not prompt_only: + for key in main_dict: + updated_dict[key] = main_dict[key] * weightage + return updated_dict + else: + return main_dict * weightage + + def state_dict_add(self, dict1, dict2, prompt_only=False): + # Average all parameters + if not prompt_only: + modified_dict = dict2 + for key in dict1: + modified_dict[key] = (modified_dict[key] + dict1[key]) + return modified_dict + else: + return dict1 + dict2 + + def get_gauss(self, mu, sigma): + gauss = lambda x: (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + return gauss + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "prompt_learner.token_prefix" in state_dict: + del state_dict["prompt_learner.token_prefix"] + + if "prompt_learner.token_suffix" in state_dict: + del state_dict["prompt_learner.token_suffix"] + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) \ No newline at end of file diff --git a/trainers/supr.py b/trainers/supr.py new file mode 100644 index 0000000000000000000000000000000000000000..95292572747f803a79b5957e4fd49b632acc5a68 --- /dev/null +++ b/trainers/supr.py @@ -0,0 +1,455 @@ +import os.path as osp +import os + + +import numpy as np +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from torch import linalg as LA +import random +from tqdm import tqdm +import yaml +import copy + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer +from clip.model import ResidualAttentionBlock_SuPr + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg, zero_shot_model=False, max_name_len=6): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + if not zero_shot_model: + design_details = {"trainer": cfg.TRAINER.SUPR.TRAINER_BACKBONE, + "vision_depth": cfg.TRAINER.SUPR.PROMPT_DEPTH_VISION, + "language_depth": cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT, + "vision_ctx": cfg.TRAINER.SUPR.N_CTX_VISION, + "language_ctx": cfg.TRAINER.SUPR.N_CTX_TEXT, + "space_dim": cfg.TRAINER.SUPR.SPACE_DIM, + "max_name_len": max_name_len} + else: + design_details = {"trainer": 'IVLP', + "vision_depth": 0, + "language_depth": 0, + "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + + def forward(self, prompts, tokenized_prompts): + #(n_cls, space_dim+1, n_ctx, dim) + x = prompts + self.positional_embedding.type(self.dtype) + n_cls, s, n_ctx, dim, = x.size() + + x = self.transformer(x) + x = self.ln_final(x).type(self.dtype) + # take features from the eot embedding (eot_token is the highest number in each sequence) + eot = tokenized_prompts.argmax(dim=-1).view(n_cls, 1, 1, 1).expand(n_cls, s, 1, dim).to(x.device) + + x = torch.gather(x, dim=2, index=eot) @ self.text_projection + + return x.squeeze(2) + + + +class SubspacePromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model, templates): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.SUPR.N_CTX_TEXT + space_dim = cfg.TRAINER.SUPR.SPACE_DIM + ctx_init = cfg.TRAINER.SUPR.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + assert cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT >= 0, "For SuPr, PROMPT_DEPTH should be >= 1, 1 is shallow prompting" + self.text_prompts_depth = cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT # max=12, but will create 11 such shared prompts + self.vision_prompts_depth = cfg.TRAINER.SUPR.PROMPT_DEPTH_VISION # max=12, but will create 11 such shared prompts + + + if ctx_init: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + self.ctx = nn.Parameter(ctx_vectors) + prompt_prefix = ctx_init + + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'init.yaml', 'r') as file: + space_init = yaml.load(file, Loader=yaml.FullLoader) + self.ctx_space = nn.ParameterList([]) + for i in range(space_dim): + ctx_init = space_init[i] + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + self.ctx_space.append(nn.Parameter(ctx_vectors)) + + + else: + # random initialization + self.ctx = nn.Parameter(torch.empty(n_ctx, ctx_dim, dtype=dtype)) + nn.init.normal_(self.ctx, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + self.ctx_space = nn.ParameterList([nn.Parameter(torch.empty(n_ctx, ctx_dim, dtype=dtype)) + for _ in range(space_dim)]) + for single_para in self.ctx_space: + nn.init.normal_(single_para, std=0.02) + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + + hard_prompt_feature = [] + + clip_model_temp = load_clip_to_cpu(cfg, True).float().cuda() + for temp in templates: + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + prompts = torch.cat([clip.tokenize(p) for p in prompts]) + prompts = prompts.to(torch.device("cuda")) + + with torch.no_grad(): + text_features = clip_model_temp.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + hard_prompt_feature.append(text_features.clone().detach()) + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.space_dim = space_dim + self.tokenized_prompts = tokenized_prompts + + self.name_lens = name_lens + self.hard_prompt_feature = torch.stack(hard_prompt_feature) + + + + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, space_dim, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, space_dim, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, space_dim, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + + all_ctx = [ctx] + for i in range(self.space_dim): + all_ctx.append(self.ctx_space[i].unsqueeze(0).expand(self.n_cls, -1, -1)) + ctx = torch.stack(all_ctx, dim=1) + + + prompts = torch.cat( + [ + prefix, # (n_cls, space_dim+1, 1, dim) + ctx, # (n_cls, space_dim+1, n_ctx, dim) + suffix, # (n_cls, space_dim+1, *, dim) + ], + dim=2, + ) + + return prompts + + def forward(self): + ctx = self.ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + prefix = self.token_prefix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + suffix = self.token_suffix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + + prompts = self.construct_prompts(ctx, prefix, suffix) + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model, templates): + super().__init__() + + self.prompt_learner = SubspacePromptLearner(cfg, classnames, clip_model, templates) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + self.space_dim = cfg.TRAINER.SUPR.SPACE_DIM + self.use_svd = cfg.TRAINER.SUPR.SVD + self.ce_weight = cfg.TRAINER.SUPR.LAMBDA # balance coeficient for two logits, gamma in the paper + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + prompts = self.prompt_learner() #(n_cls * space_dim+1, n_ctx, dim) + text_features = self.text_encoder(prompts, tokenized_prompts)#(n_cls, n_ctx, dim) + image_features = self.image_encoder(image.type(self.dtype)) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + text_feature_point = text_features[:,0,:] + text_features = text_features[:,1:,:] + projected_image_feature = self.project_space(image_features.unsqueeze(1).expand(-1, self.prompt_learner.n_cls, -1),text_features) # n_query n_classes n_dim + + cos_sim = torch.nn.CosineSimilarity(dim=2,eps=1e-07) + logits = logit_scale * cos_sim(image_features.unsqueeze(1).float(),projected_image_feature) + logits_point = logit_scale * image_features @ text_feature_point.t() + + if self.prompt_learner.training: + hard_prompt_feature = self.prompt_learner.hard_prompt_feature # template, n_cls, dim + projected_hardtext_feature = self.project_space(hard_prompt_feature, text_features) + + return logits, F.cross_entropy(logits, label), \ + F.cross_entropy(logits_point, label), \ + F.cosine_embedding_loss(hard_prompt_feature.flatten(0,1), projected_hardtext_feature.flatten(0,1), + torch.ones(hard_prompt_feature.flatten(0,1).size(0)).to(label.device), margin=0.0) + + else: + return self.ce_weight * logits + (1 - self.ce_weight) * logits_point + + def project_space(self, z_query, z_support): + # Work on support vectors + # the shape of z_support is [n_classes, n_support, n_dim] + #come half, trans float() for inverse, + z_support = z_support.float() + z_query = z_query.float() + + # use svd or not to calculate the projection + if self.use_svd: + + z_support = z_support.permute(0,2,1) #n_classes n_dim n_support + + try:# avoid dependency between support vectors + u, s, v = torch.linalg.svd(z_support, full_matrices=False) + except: + u, s, v = torch.linalg.svd(z_support + 1e-4 * torch.randn_like(z_support),full_matrices=False) + z_support = u + # Work on query vectors + # N_0 maybe the number of images or the number of hard prompts embedding + # z_query [N_0 n_classes n_dim] + # n_classes, n_support, n_dim * n_classes, n_dim, N_0 = n_classes, n_support, N_0 + self.beta_hat = torch.matmul(z_support.transpose(1,2), z_query.permute(1,2,0)) + z_lrc = torch.matmul(z_support,self.beta_hat) + return z_lrc.permute(2,0,1) + + else: #use least square to calculate the projection + try:# avoid dependency between support vectors + z_supports_inv = torch.matmul(torch.linalg.inv( + torch.matmul(z_support, z_support.transpose(1, 2))), z_support)# n_classes, n_support, n_dim + except: + z_supports_inv = torch.matmul(torch.linalg.inv( + torch.matmul(z_support, z_support.transpose(1, 2)) + 1e-4 * torch.eye( #n_classes, n_support, n_support + z_support.shape[1],).cuda().repeat(z_support.shape[0], 1, 1)), z_support)# n_classes, n_support, n_dim + + beta_hat = torch.matmul(z_supports_inv, z_query.permute(1, 2, 0)) # [n_classes, n_support, n_dim] * [n_classes, n_dim, N_0] = [n_classes, n_support, N_0] + z_lrc = torch.matmul(z_support.transpose(1, 2), beta_hat) # [n_classes, n_dim, n_support] * [n_classes, n_support, N_0] = n_classes, n_dim, T + + return z_lrc.permute(2,0,1) + + + +@TRAINER_REGISTRY.register() +class SuPr(TrainerX): + """Supspace Prompting. + """ + def check_cfg(self, cfg): + assert cfg.TRAINER.SUPR.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + #calculate the max length of class names for concatenation + all_classnames = [name.replace("_", " ") for name in self.dm.dataset.all_classnames] + max_name_len = max([len(_tokenizer.encode(name)) for name in all_classnames]) + 2 #'. EOS' + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg,False, max_name_len=max_name_len) + + if cfg.TRAINER.PROMPTSRC.PREC == "fp32" or cfg.TRAINER.PROMPTSRC.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + # load the hard prompts templates + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'genertic_templates.yaml', 'r') as file: + genertic_hard_prompt = yaml.load(file, Loader=yaml.FullLoader) + templates = genertic_hard_prompt # + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model, templates) + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + # Make sure that VPT prompts are updated + if "VPT" in name: + param.requires_grad_(True) + else: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + + #### + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + # However, IMAGENET need multiple GPU for 1000 class text prompt (3090TI) + self.device = torch.device("cuda:0") + self.device1 = torch.device("cuda") + self.model.to(self.device) + device_count = torch.cuda.device_count() + if device_count > 1: + self.model.text_encoder = nn.DataParallel(self.model.text_encoder.to(self.device1)) + + + + # NOTE: only give prompt_learner to the optimizer + + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("SubspacePromptLearner", self.model, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.SUPR.PREC == "amp" else None + + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.SUPR.PREC + if prec == "amp": + with autocast(): + output, loss_ce, loss_ce_point, loss_hard_reg = model(image, label) + loss = self.cfg.TRAINER.SUPR.LAMBDA * loss_ce + (1 - self.cfg.TRAINER.SUPR.LAMBDA) * loss_ce_point \ + + loss_hard_reg * self.cfg.TRAINER.SUPR.REG_LOSS_WEIGHT + optim.zero_grad() + scaler.scale(loss).backward() + scaler.unscale_(optim) + # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) + scaler.step(optim) + scaler.update() + else: + output, loss_ce, loss_ce_point, loss_hard_reg = model(image, label) + loss = self.cfg.TRAINER.SUPR.LAMBDA * loss_ce + (1 - self.cfg.TRAINER.SUPR.LAMBDA) * loss_ce_point \ + + loss_hard_reg * self.cfg.TRAINER.SUPR.REG_LOSS_WEIGHT + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + "loss_ce": loss_ce.item(), + "loss_ce_point": loss_ce_point.item(), + "loss_hard_reg": loss_hard_reg.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "prompt_learner.token_prefix" in state_dict: + del state_dict["prompt_learner.token_prefix"] + + if "prompt_learner.token_suffix" in state_dict: + del state_dict["prompt_learner.token_suffix"] + + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + self._models[name].load_state_dict(state_dict, strict=False) \ No newline at end of file diff --git a/trainers/supr_ens.py b/trainers/supr_ens.py new file mode 100644 index 0000000000000000000000000000000000000000..dab3c4a7d03ff93b82759c24019dbfe353405205 --- /dev/null +++ b/trainers/supr_ens.py @@ -0,0 +1,501 @@ +import collections +import os.path as osp +import os + + +import numpy as np +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from torch import linalg as LA +import random +from tqdm import tqdm +import yaml +import copy + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer +from clip.model import ResidualAttentionBlock_SuPr + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg, zero_shot_model=False, max_name_len=6): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + if not zero_shot_model: + design_details = {"trainer": cfg.TRAINER.SUPR.TRAINER_BACKBONE, + "vision_depth": cfg.TRAINER.SUPR.PROMPT_DEPTH_VISION, + "language_depth": cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT, + "vision_ctx": cfg.TRAINER.SUPR.N_CTX_VISION, + "language_ctx": cfg.TRAINER.SUPR.N_CTX_TEXT, + "space_dim": cfg.TRAINER.SUPR.SPACE_DIM, + "max_name_len": max_name_len} + else: + design_details = {"trainer": 'IVLP', + "vision_depth": 0, + "language_depth": 0, + "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + + def forward(self, prompts, tokenized_prompts): + #(n_cls, space_dim+1, n_ctx, dim) + x = prompts + self.positional_embedding.type(self.dtype) + n_cls, s, n_ctx, dim, = x.size() + + x = self.transformer(x) + x = self.ln_final(x).type(self.dtype) + # take features from the eot embedding (eot_token is the highest number in each sequence) + eot = tokenized_prompts.argmax(dim=-1).view(n_cls, 1, 1, 1).expand(n_cls, s, 1, dim).to(x.device) + + x = torch.gather(x, dim=2, index=eot) @ self.text_projection + + return x.squeeze(2) + + + +class SubspacePromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model, templates): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.SUPR.N_CTX_TEXT + space_dim = cfg.TRAINER.SUPR.SPACE_DIM + ctx_init = cfg.TRAINER.SUPR.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + assert cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT >= 0, "For SuPr, PROMPT_DEPTH should be >= 1, 1 is shallow prompting" + self.text_prompts_depth = cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT # max=12, but will create 11 such shared prompts + self.vision_prompts_depth = cfg.TRAINER.SUPR.PROMPT_DEPTH_VISION # max=12, but will create 11 such shared prompts + + + if ctx_init: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + self.ctx = nn.Parameter(ctx_vectors) + prompt_prefix = ctx_init + + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'init.yaml', 'r') as file: + space_init = yaml.load(file, Loader=yaml.FullLoader) + self.ctx_space = nn.ParameterList([]) #self.ctx_space + for i in range(space_dim): + ctx_init = space_init[i] + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + self.ctx_space.append(nn.Parameter(ctx_vectors)) + + + else: + # random initialization + self.ctx = nn.Parameter(torch.empty(n_ctx, ctx_dim, dtype=dtype)) + nn.init.normal_(self.ctx, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + self.ctx_space = nn.ParameterList([nn.Parameter(torch.empty(n_ctx, ctx_dim, dtype=dtype)) + for _ in range(space_dim)]) #ctx_space + for single_para in self.ctx_space: + nn.init.normal_(single_para, std=0.02) + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + + hard_prompt_feature = [] + + clip_model_temp = load_clip_to_cpu(cfg, True).float().cuda() + for temp in templates: + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + prompts = torch.cat([clip.tokenize(p) for p in prompts]) + prompts = prompts.to(torch.device("cuda")) + + with torch.no_grad(): + text_features = clip_model_temp.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + hard_prompt_feature.append(text_features.clone().detach()) + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.space_dim = space_dim + self.tokenized_prompts = tokenized_prompts + + self.name_lens = name_lens + self.hard_prompt_feature = torch.stack(hard_prompt_feature) + + + + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, space_dim, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, space_dim, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, space_dim, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + + all_ctx = [ctx] + for i in range(self.space_dim): + all_ctx.append(self.ctx_space[i].unsqueeze(0).expand(self.n_cls, -1, -1))#ctx_space + ctx = torch.stack(all_ctx, dim=1) + + + prompts = torch.cat( + [ + prefix, # (n_cls, space_dim+1, 1, dim) + ctx, # (n_cls, space_dim+1, n_ctx, dim) + suffix, # (n_cls, space_dim+1, *, dim) + ], + dim=2, + ) + + return prompts + + def forward(self): + ctx = self.ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + prefix = self.token_prefix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + suffix = self.token_suffix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + + prompts = self.construct_prompts(ctx, prefix, suffix) + + return prompts + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model, templates): + super().__init__() + + self.prompt_learner = SubspacePromptLearner(cfg, classnames, clip_model, templates) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + self.space_dim = cfg.TRAINER.SUPR.SPACE_DIM + self.use_svd = cfg.TRAINER.SUPR.SVD + self.ce_weight = cfg.TRAINER.SUPR.LAMBDA # balance coeficient for two logits, gamma in the paper + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + prompts = self.prompt_learner() #(n_cls * space_dim+1, n_ctx, dim) + text_features = self.text_encoder(prompts, tokenized_prompts)#(n_cls, n_ctx, dim) + image_features = self.image_encoder(image.type(self.dtype)) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + text_feature_point = text_features[:,0,:] + text_features = text_features[:,1:,:] + projected_image_feature = self.project_space(image_features.unsqueeze(1).expand(-1, self.prompt_learner.n_cls, -1),text_features) # n_query n_classes n_dim + + cos_sim = torch.nn.CosineSimilarity(dim=2,eps=1e-07) + logits = logit_scale * cos_sim(image_features.unsqueeze(1).float(),projected_image_feature) + logits_point = logit_scale * image_features @ text_feature_point.t() + + if self.prompt_learner.training: + hard_prompt_feature = self.prompt_learner.hard_prompt_feature # template, n_cls, dim + projected_hardtext_feature = self.project_space(hard_prompt_feature, text_features) + + return logits, F.cross_entropy(logits, label), \ + F.cross_entropy(logits_point, label), \ + F.cosine_embedding_loss(hard_prompt_feature.flatten(0,1), projected_hardtext_feature.flatten(0,1), + torch.ones(hard_prompt_feature.flatten(0,1).size(0)).to(label.device), margin=0.0) + + else: + return self.ce_weight * logits + (1 - self.ce_weight) * logits_point + + def project_space(self, z_query, z_support): + # Work on support vectors + # the shape of z_support is [n_classes, n_support, n_dim] + #come half, trans float() for inverse, + z_support = z_support.float() + z_query = z_query.float() + + # use svd or not to calculate the projection + if self.use_svd: + + z_support = z_support.permute(0,2,1) #n_classes n_dim n_support + + try:# avoid dependency between support vectors + u, s, v = torch.linalg.svd(z_support, full_matrices=False) + except: + u, s, v = torch.linalg.svd(z_support + 1e-4 * torch.randn_like(z_support),full_matrices=False) + z_support = u + # Work on query vectors + # N_0 maybe the number of images or the number of hard prompts embedding + # z_query [N_0 n_classes n_dim] + # n_classes, n_support, n_dim * n_classes, n_dim, N_0 = n_classes, n_support, N_0 + self.beta_hat = torch.matmul(z_support.transpose(1,2), z_query.permute(1,2,0)) + z_lrc = torch.matmul(z_support,self.beta_hat) + return z_lrc.permute(2,0,1) + + else: #use least square to calculate the projection + try:# avoid dependency between support vectors + z_supports_inv = torch.matmul(torch.linalg.inv( + torch.matmul(z_support, z_support.transpose(1, 2))), z_support)# n_classes, n_support, n_dim + except: + z_supports_inv = torch.matmul(torch.linalg.inv( + torch.matmul(z_support, z_support.transpose(1, 2)) + 1e-4 * torch.eye( #n_classes, n_support, n_support + z_support.shape[1],).cuda().repeat(z_support.shape[0], 1, 1)), z_support)# n_classes, n_support, n_dim + + beta_hat = torch.matmul(z_supports_inv, z_query.permute(1, 2, 0)) # [n_classes, n_support, n_dim] * [n_classes, n_dim, N_0] = [n_classes, n_support, N_0] + z_lrc = torch.matmul(z_support.transpose(1, 2), beta_hat) # [n_classes, n_dim, n_support] * [n_classes, n_support, N_0] = n_classes, n_dim, T + + return z_lrc.permute(2,0,1) + + +class CustomCLIP_Ens(nn.Module): + def __init__(self, cfg, classnames, templates, all_classnames, ensemble_num): + super().__init__() + self.ensemble_num = ensemble_num + # distribute templates to each model + split_templates = [templates[i::ensemble_num] for i in range(ensemble_num)] + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + + #calculate the max length of class names for concatenation + all_classnames = [name.replace("_", " ") for name in all_classnames] + max_name_len = max([len(_tokenizer.encode(name)) for name in all_classnames]) + 2 #'. EOS' + + ens_clip_model = [load_clip_to_cpu(cfg,False, max_name_len=max_name_len) for _ in range(ensemble_num)] #replace new VPT for ensemble model + if cfg.TRAINER.SUPR.PREC == "fp32" or cfg.TRAINER.SUPR.PREC == "amp": + # CLIP's default precision is fp16 + ens_clip_model = [clip_model.float() for clip_model in ens_clip_model] + + # share the frozen parameters for all models + for i in range(1,ensemble_num): + for name, param in ens_clip_model[i].named_parameters(): + if "VPT" not in name: + module = ens_clip_model[i] + module_shared = ens_clip_model[0] + modules = name.split('.') + if len(modules)>1: + for module_name in modules[:-1]: + module = getattr(module, module_name) + module_shared = getattr(module_shared, module_name) + module_shared = getattr(module_shared, modules[-1]) + setattr(module, modules[-1], module_shared) + + self.ensemble_model = nn.ModuleList([CustomCLIP(cfg, classnames, ens_clip_model[i], split_templates[i]) + for i in range(ensemble_num)]) + + def forward(self, image, label=None): + results = [model(image, label) if label is not None else model(image) + for model in self.ensemble_model] + if label is not None: + stacked_results = [ + torch.stack([r[i] for r in results]).mean(0) + for i in range(len(results[0])) + ] + return tuple(stacked_results) + return torch.stack(results).mean(0) + +@TRAINER_REGISTRY.register() +class SuPrEns(TrainerX): + """Supspace Prompting with Ensemble + """ + def check_cfg(self, cfg): + assert cfg.TRAINER.SUPR.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print("Building custom CLIP") + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'genertic_templates.yaml', 'r') as file: + genertic_hard_prompt = yaml.load(file, Loader=yaml.FullLoader) + templates = genertic_hard_prompt #+ specific_hard_pormpt + + assert cfg.TRAINER.SUPR.ENSEMBLE_NUM>1, f"Ensemble number should >1, 1 for SuPr, else for SuPr-Ens" + self.model = CustomCLIP_Ens(cfg, classnames, templates, + self.dm.dataset.all_classnames,cfg.TRAINER.SUPR.ENSEMBLE_NUM) + + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + # Make sure that VPT prompts are updated + if "VPT" in name: + param.requires_grad_(True) + else: + param.requires_grad_(False) + else: + if "ZS_image_encoder" in name: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + + #### + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + # However, IMAGENET need multiple GPU for 1000 class text prompt (3090TI) + self.device = torch.device("cuda:0") + self.device1 = torch.device("cuda") + self.model.to(self.device) + for ensemble_model in self.model.ensemble_model: + ensemble_model.text_encoder=nn.DataParallel(ensemble_model.text_encoder.to(self.device1)) + + + + # NOTE: only give prompt_learner to the optimizer + + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("SubspacePromptLearner", self.model, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.SUPR.PREC == "amp" else None + + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.SUPR.PREC + if prec == "amp": + with autocast(): + output, loss_ce, loss_ce_point, loss_hard_reg = model(image, label) + loss = self.cfg.TRAINER.SUPR.LAMBDA * loss_ce + (1 - self.cfg.TRAINER.SUPR.LAMBDA) * loss_ce_point \ + + loss_hard_reg * self.cfg.TRAINER.SUPR.REG_LOSS_WEIGHT + optim.zero_grad() + scaler.scale(loss).backward() + scaler.unscale_(optim) + # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) + scaler.step(optim) + scaler.update() + else: + output, loss_ce, loss_ce_point, loss_hard_reg = model(image, label) + loss = self.cfg.TRAINER.SUPR.LAMBDA * loss_ce + (1 - self.cfg.TRAINER.SUPR.LAMBDA) * loss_ce_point \ + + loss_hard_reg * self.cfg.TRAINER.SUPR.REG_LOSS_WEIGHT + self.model_backward_and_update(loss) + + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + "loss_ce": loss_ce.item(), + "loss_ce_point": loss_ce_point.item(), + "loss_hard_reg": loss_hard_reg.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + for s in state_dict.keys(): + if "token_prefix" in s: + print(s) + + # Ignore fixed token vectors + for i in range(50): + if "ensemble_model."+str(i)+".prompt_learner.token_prefix" in state_dict: + del state_dict["ensemble_model."+str(i)+".prompt_learner.token_prefix"] + if "ensemble_model."+str(i)+".prompt_learner.token_suffix" in state_dict: + del state_dict["ensemble_model."+str(i)+".prompt_learner.token_suffix"] + + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) + + diff --git a/trainers/supr_ens_old.py b/trainers/supr_ens_old.py new file mode 100644 index 0000000000000000000000000000000000000000..1364c0bce16bde9bcca777f121f75f311d097ff5 --- /dev/null +++ b/trainers/supr_ens_old.py @@ -0,0 +1,561 @@ +import os.path as osp +import os + + +import numpy as np +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast + +from torch import linalg as LA +import random +from tqdm import tqdm +import yaml +import copy + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer +from clip.model import ResidualAttentionBlock_SuPr + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg, zero_shot_model=False, max_name_len=6): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + if not zero_shot_model: + design_details = {"trainer": cfg.TRAINER.SUPR.TRAINER_BACKBONE, + "vision_depth": cfg.TRAINER.SUPR.PROMPT_DEPTH_VISION, + "language_depth": cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT, + "vision_ctx": cfg.TRAINER.SUPR.N_CTX_VISION, + "language_ctx": cfg.TRAINER.SUPR.N_CTX_TEXT, + "space_dim": cfg.TRAINER.SUPR.SPACE_DIM, + "max_name_len": max_name_len} + else: + design_details = {"trainer": 'IVLP', + "vision_depth": 0, + "language_depth": 0, + "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + + def forward(self, prompts, tokenized_prompts): + #(n_cls, space_dim + 1, n_ctx, dim) + x = prompts + self.positional_embedding.type(self.dtype) + n_cls, s, n_ctx, dim, = x.size() + + x = self.transformer(x) + x = self.ln_final(x).type(self.dtype) + # take features from the eot embedding (eot_token is the highest number in each sequence) + eot = tokenized_prompts.argmax(dim=-1).view(n_cls, 1, 1, 1).expand(n_cls, s, 1, dim) + x = torch.gather(x, dim=2, index=eot) @ self.text_projection + + return x.squeeze(2) + + + +class SubspacePromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model, templates, all_classnames): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.SUPR.N_CTX_TEXT + space_dim = cfg.TRAINER.SUPR.SPACE_DIM + ctx_init = cfg.TRAINER.SUPR.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + assert cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT >= 0, "For SuPr, PROMPT_DEPTH should be >= 1, 1 is shallow prompting" + self.text_prompts_depth = cfg.TRAINER.SUPR.PROMPT_DEPTH_TEXT # max=12, but will create 11 such shared prompts + self.vision_prompts_depth = cfg.TRAINER.SUPR.PROMPT_DEPTH_VISION # max=12, but will create 11 such shared prompts + + + if ctx_init: + # use given words to initialize context vectors + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + self.ctx = nn.Parameter(ctx_vectors) + prompt_prefix = ctx_init + + else: + # random initialization + self.ctx = nn.Parameter(torch.empty(n_ctx, ctx_dim, dtype=dtype)) + nn.init.normal_(self.ctx, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + classnames = [name.replace("_", " ") for name in classnames] + name_lens = [len(_tokenizer.encode(name)) for name in classnames] + prompts = [prompt_prefix + " " + name + "." for name in classnames] + + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + max_name_len = max([len(_tokenizer.encode(name)) for name in all_classnames]) + 2 #'.EOS' + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + + hard_prompt_feature = [] + + clip_model_temp = load_clip_to_cpu(cfg, True).float().cuda() + for temp in templates: + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + prompts = torch.cat([clip.tokenize(p) for p in prompts]) + prompts = prompts.to(torch.device("cuda")) + + with torch.no_grad(): + text_features = clip_model_temp.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + hard_prompt_feature.append(text_features.clone().detach()) + # hard_prompt_feature.append(torch.stack(hard_prompt_feature, dim=0).mean(dim=0)) + + + + self.n_cls = n_cls + self.n_ctx = n_ctx + self.space_dim = space_dim + self.tokenized_prompts = tokenized_prompts # torch.Tensor + + self.name_lens = name_lens + self.max_name_len = max_name_len + self.hard_prompt_feature = torch.stack(hard_prompt_feature) + + + + + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'init.yaml', 'r') as file: + space_init = yaml.load(file, Loader=yaml.FullLoader) + + self.scale = nn.ParameterList([nn.Parameter(torch.zeros(n_ctx, ctx_dim)) for _ in range(space_dim)]) + # self.bias = nn.ParameterList([nn.Parameter(torch.zeros(ctx_dim)) for _ in range(space_dim)]) + self.bias = nn.ParameterList([]) + + for i in range(space_dim): + ctx_init = space_init[i] + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + self.bias.append(nn.Parameter(ctx_vectors)) + + + # for single_para in self.bias: + # nn.init.normal_(single_para, std=0.02) + + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + VPT = [ctx] + for i in range(self.space_dim): + scale = self.scale[i].unsqueeze(0).half() + bias = self.bias[i].unsqueeze(0).half() + + # t = scale * VPT[0] + # t = t + bias + t = self.bias[i].unsqueeze(0).expand(self.n_cls, -1, -1) + VPT.append(t) + ctx = torch.stack(VPT,dim=1) + prompts = torch.cat( + [ + prefix, # (n_cls, space_dim+1, 1, dim) + ctx, # (n_cls, space_dim+1, n_ctx, dim) + suffix, # (n_cls, space_dim+1, *, dim) + ], + dim=2, + ) + + return prompts + + def forward(self): + ctx = self.ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + prefix = self.token_prefix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + suffix = self.token_suffix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + + prompts = self.construct_prompts(ctx, prefix, suffix) + + return prompts # pass here original, as for visual 768 is required + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model, templates, all_classnames,): + super().__init__() + + self.prompt_learner = SubspacePromptLearner(cfg, classnames, clip_model, templates, all_classnames) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + self.space_dim = cfg.TRAINER.SUPR.SPACE_DIM + self.ce_weight = cfg.TRAINER.SUPR.LAMBDA + self.use_svd = cfg.TRAINER.SUPR.SVD + self.device = torch.device("cuda:0") + self.device1 = torch.device("cuda") + + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + + logit_scale = self.logit_scale.exp() + + prompts = self.prompt_learner() #(n_cls * space_dim + 1, n_ctx, dim) + text_features = self.text_encoder(prompts, tokenized_prompts)#(n_cls, space_dim + 1, n_ctx, dim) + image_features = self.image_encoder(image.type(self.dtype)) + + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + text_feature_point = text_features[:,0,:] + text_features = text_features[:,1:,:] + projected_image_feature = self.project_space(image_features.unsqueeze(1).expand(-1, self.prompt_learner.n_cls, -1),text_features) # n_query n_classes n_dim + + + cos_sim = torch.nn.CosineSimilarity(dim=2,eps=1e-07) + logits = logit_scale * cos_sim(image_features.unsqueeze(1).float(),projected_image_feature) + logits_point = logit_scale * image_features @ text_feature_point.t() + + + + if self.prompt_learner.training: + hard_prompt_feature = self.prompt_learner.hard_prompt_feature # T C D + projected_hardtext_feature = self.project_space(hard_prompt_feature, text_features) + + + + + return logits, F.cross_entropy(logits, label), \ + F.cross_entropy(logits_point, label), \ + F.cosine_embedding_loss(hard_prompt_feature.flatten(0,1), projected_hardtext_feature.flatten(0,1), + torch.ones(hard_prompt_feature.flatten(0,1).size(0)).to(self.device), margin=0.0) + # F.cosine_embedding_loss(hard_feature, hard_projected_feature, + # torch.ones(hard_feature.size(0)).to(self.device), margin=0.0) + + # F.cosine_embedding_loss(zero_projected_image_feature, zero_shot_features, + # torch.ones(zero_shot_features.size(0)).to(self.device), margin=0.0) + + else: + return self.ce_weight * logits + (1 - self.ce_weight) * logits_point + + + def project_space_svd(self, z_query, z_support): + + # Work on support vectors + z_support = z_support.float() + z_query = z_query.float() + #z_support C N D + + z_support = z_support.permute(0,2,1) #C D N + + # u, s, v = torch.svd(z_support.float(), some=True) + # try: + u, s, v = torch.linalg.svd(z_support, full_matrices=False) + # except: + # with torch.no_grad(): + # z_mean = z_support.mean() + # u, s, v = torch.linalg.svd(z_support + 1e-3 * z_mean * torch.randn_like(z_support),full_matrices=False) + + z_support = u + + + # Work on query vectors + # z_query [template n_classes n_dim] + self.beta_hat = torch.matmul(z_support.transpose(1,2), z_query.permute(1,2,0)) # ? C N D * C D B = C N B + z_lrc = torch.matmul(z_support,self.beta_hat) # ? C D N * C N B = C D B + return z_lrc.permute(2,0,1) + + def project_space(self, z_query, z_support): + z_support = z_support.float() + z_query = z_query.float() + + if self.use_svd: + #z_support C N D + z_support = z_support.permute(0,2,1) #C D N + + # u, s, v = torch.svd(z_support.float(), some=True) + # try: + u, s, v = torch.linalg.svd(z_support, full_matrices=False) + # except: + # with torch.no_grad(): + # z_mean = z_support.mean() + # u, s, v = torch.linalg.svd(z_support + 1e-3 * z_mean * torch.randn_like(z_support),full_matrices=False) + z_support = u + # Work on query vectors + # z_query [template n_classes n_dim] + self.beta_hat = torch.matmul(z_support.transpose(1,2), z_query.permute(1,2,0)) # ? C N D * C D B = C N B + z_lrc = torch.matmul(z_support,self.beta_hat) # ? C D N * C N B = C D B + return z_lrc.permute(2,0,1) + else: + # Work on support vectors + # the shape of z_support is [n_classes, n_support, n_dim] + + # Batched inverses :), in PyTorch > 0.4.1 + # Need regularization to ensure matrix inverse is possible to compute + # try: + z_supports_inv = torch.matmul(torch.linalg.inv( + torch.matmul(z_support, z_support.transpose(1, 2))), z_support)# n_classes, n_support, n_dim + # except: + # z_supports_inv = torch.matmul(torch.linalg.inv( + # torch.matmul(z_support, z_support.transpose(1, 2)) + 1e-4 * torch.eye( #n_classes, n_support, n_support + # z_support.shape[1],).cuda().repeat(z_support.shape[0], 1, 1)), z_support)# n_classes, n_support, n_dim + + # Work on query vectors + # z_query [template n_classes n_dim] + + beta_hat = torch.matmul(z_supports_inv, z_query.permute(1, 2, 0)) # ? [n_classes, n_support, T] ([n_classes, n_support, n_dim] * [n_classes, n_dim, T]) + z_lrc = torch.matmul(z_support.transpose(1, 2), beta_hat) # n_classes, n_dim, T ([n_classes, n_dim, n_support] * [n_classes, n_support, T]) + + # z_lrc = z_lrc.half() + return z_lrc.permute(2,0,1) + + +class CustomCLIP_Ens(nn.Module): + def __init__(self, cfg, classnames, templates, all_classnames, ensemble_num): + super().__init__() + self.ensemble_num = ensemble_num + split_templates = [templates[i::ensemble_num] for i in range(ensemble_num)] + # n = len(lst) + # [lst[i * n // k:(i + 1) * n // k] for i in range(k)] + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + all_classnames = [name.replace("_", " ") for name in all_classnames] + max_name_len = max([len(_tokenizer.encode(name)) for name in all_classnames]) + 2 #'. EOS' + ens_clip_model = [load_clip_to_cpu(cfg,False, max_name_len=max_name_len) for _ in range(ensemble_num)] #replace new VPT for ensemble model + if cfg.TRAINER.SUPR.PREC == "fp32" or cfg.TRAINER.SUPR.PREC == "amp": + # CLIP's default precision is fp16 + ens_clip_model = [clip_model.float() for clip_model in ens_clip_model] + + + for i in range(1,ensemble_num): + for name, param in ens_clip_model[i].named_parameters(): + if "VPT" not in name: + module = ens_clip_model[i] + module_shared = ens_clip_model[0] + modules = name.split('.') + if len(modules)>1: + for module_name in modules[:-1]: + module = getattr(module, module_name) + module_shared = getattr(module_shared, module_name) + # 最终通过 set_attr 设置新的参数 + module_shared = getattr(module_shared, modules[-1]) + setattr(module, modules[-1], module_shared) + + + + self.ensemble_model = nn.ModuleList([CustomCLIP(cfg, classnames, ens_clip_model[i], split_templates[i], all_classnames) + for i in range(ensemble_num)]) + + def forward(self, image, label=None): + results = [model(image, label) if label is not None else model(image) + for model in self.ensemble_model] + if label is not None: + stacked_results = [ + torch.stack([r[i] for r in results]).mean(0) + for i in range(len(results[0])) + ] + return tuple(stacked_results) + return torch.stack(results).mean(0) + +@TRAINER_REGISTRY.register() +class SuPrEns(TrainerX): + """Supspace Prompting. + """ + def check_cfg(self, cfg): + assert cfg.TRAINER.SUPR.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print("Building custom CLIP") + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'genertic_templates.yaml', 'r') as file: + genertic_hard_prompt = yaml.load(file, Loader=yaml.FullLoader) + templates = genertic_hard_prompt #+ specific_hard_pormpt + + assert cfg.TRAINER.SUPR.ENSEMBLE_NUM>=1, f"Ensemble number should >1, 1 for SuPr, else for SuPr-Ens" + self.model = CustomCLIP_Ens(cfg, classnames, templates, + self.dm.dataset.all_classnames,cfg.TRAINER.SUPR.ENSEMBLE_NUM) + + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + # Make sure that VPT prompts are updated + if "VPT" in name: + param.requires_grad_(True) + else: + param.requires_grad_(False) + else: + if "ZS_image_encoder" in name: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + + #### + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + # However, IMAGENET need multiple GPU for 1000 class text prompt (3090TI) + self.device = torch.device("cuda:0") + self.device1 = torch.device("cuda") + self.model.to(self.device) + for ensemble_model in self.model.ensemble_model: + ensemble_model.text_encoder=nn.DataParallel(ensemble_model.text_encoder.to(self.device1)) + + + + # NOTE: only give prompt_learner to the optimizer + + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("SubspacePromptLearner", self.model, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.SUPR.PREC == "amp" else None + + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.SUPR.PREC + if prec == "amp": + with autocast(): + output, loss_ce, loss_hard_reg = model(image, label) + loss = loss_ce + loss_hard_reg * self.cfg.TRAINER.SUPR.REG_LOSS_WEIGHT + optim.zero_grad() + scaler.scale(loss).backward() + scaler.unscale_(optim) + # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) + scaler.step(optim) + scaler.update() + else: + output, loss_ce, loss_ce_point, loss_hard_reg = model(image, label) + loss = self.cfg.TRAINER.SUPR.LAMBDA * loss_ce + (1 - self.cfg.TRAINER.SUPR.LAMBDA) * loss_ce_point \ + + loss_hard_reg * self.cfg.TRAINER.SUPR.REG_LOSS_WEIGHT + self.model_backward_and_update(loss) + + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + "loss_ce": loss_ce.item(), + "loss_ce_point": loss_ce_point.item(), + "loss_hard_reg": loss_hard_reg.item(), + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + + for s in state_dict.keys(): + if "token_prefix" in s: + print(s) + + # Ignore fixed token vectors + for i in range(50): + if "ensemble_model."+str(i)+".prompt_learner.token_prefix" in state_dict: + del state_dict["ensemble_model."+str(i)+".prompt_learner.token_prefix"] + if "ensemble_model."+str(i)+".prompt_learner.token_suffix" in state_dict: + del state_dict["ensemble_model."+str(i)+".prompt_learner.token_suffix"] + + + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) + + + diff --git a/trainers/supr_promptsrc.py b/trainers/supr_promptsrc.py new file mode 100644 index 0000000000000000000000000000000000000000..3666538f6c649dff8a95306b515c3e3b6b3ca1fe --- /dev/null +++ b/trainers/supr_promptsrc.py @@ -0,0 +1,365 @@ +import copy +import os.path as osp +import numpy as np +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast +import yaml + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer +from .imagenet_templates import IMAGENET_TEMPLATES + +from .promptsrc import PromptSRC +from .promptsrc import CustomCLIP as CustomCLIP_ +from .promptsrc import VLPromptLearner as VLPromptLearner_ +from .supr import load_clip_to_cpu +from .supr import TextEncoder +_tokenizer = _Tokenizer() + + + + +class VLPromptLearner(VLPromptLearner_): + def __init__(self, cfg, classnames, clip_model, templates): + super().__init__(cfg, classnames, clip_model) + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + + #Prepare space prompts for SuPr + space_dim = cfg.TRAINER.SUPR.SPACE_DIM + if cfg.TRAINER.PROMPTSRC.CTX_INIT: + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'init.yaml', 'r') as file: + space_init = yaml.load(file, Loader=yaml.FullLoader) + self.ctx_space = nn.ParameterList([]) + for i in range(space_dim): + ctx_init = space_init[i] + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + self.ctx_space.append(nn.Parameter(ctx_vectors)) + else: + # random initialization + self.ctx_space = nn.ParameterList([nn.Parameter(torch.empty(n_ctx, ctx_dim, dtype=dtype)) + for _ in range(space_dim)]) + for single_para in self.ctx_space: + nn.init.normal_(single_para, std=0.02) + + # Prepare the hard prompt embeddings + hard_prompt_feature = [] + + clip_model_temp = load_clip_to_cpu(cfg, True).float().cuda() + for temp in templates: + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + prompts = torch.cat([clip.tokenize(p) for p in prompts]) + prompts = prompts.to(torch.device("cuda")) + + with torch.no_grad(): + text_features = clip_model_temp.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + hard_prompt_feature.append(text_features.clone().detach()) + + self.space_dim = space_dim + self.hard_prompt_feature = torch.stack(hard_prompt_feature) + + def construct_prompts(self, ctx, prefix, suffix, label=None): + # dim0 is either batch_size (during training) or n_cls (during testing) + # ctx: context tokens, with shape of (dim0, space_dim, n_ctx, ctx_dim) + # prefix: the sos token, with shape of (n_cls, space_dim, 1, ctx_dim) + # suffix: remaining tokens, with shape of (n_cls, space_dim, *, ctx_dim) + + if label is not None: + prefix = prefix[label] + suffix = suffix[label] + + + all_ctx = [ctx] + for i in range(self.space_dim): + all_ctx.append(self.ctx_space[i].unsqueeze(0).expand(self.n_cls, -1, -1)) + ctx = torch.stack(all_ctx, dim=1) + + + prompts = torch.cat( + [ + prefix, # (n_cls, space_dim+1, 1, dim) + ctx, # (n_cls, space_dim+1, n_ctx, dim) + suffix, # (n_cls, space_dim+1, *, dim) + ], + dim=2, + ) + + return prompts + + def forward(self): + ctx = self.ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + prefix = self.token_prefix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + suffix = self.token_suffix.unsqueeze(1).expand(-1, self.space_dim + 1, -1, -1) + + prompts = self.construct_prompts(ctx, prefix, suffix) + + return prompts + + + +class CustomCLIP(CustomCLIP_): + def __init__(self, cfg, classnames, clip_model, templates): + super().__init__(cfg, classnames, clip_model) + self.prompt_learner = VLPromptLearner(cfg, classnames, clip_model, templates) + self.text_encoder = TextEncoder(clip_model) + + self.space_dim = cfg.TRAINER.SUPR.SPACE_DIM + self.ce_weight = cfg.TRAINER.SUPR.LAMBDA + self.use_svd = cfg.TRAINER.SUPR.SVD + + + def forward(self, image, label=None): + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + # Compute the prompted image and text features + prompts = self.prompt_learner() #(n_cls * space_dim+1, n_ctx, dim) + text_features = self.text_encoder(prompts, tokenized_prompts)#(n_cls, n_ctx, dim) + image_features = self.image_encoder(image.type(self.dtype)) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + text_features_space = text_features[:,1:,:] + text_features = text_features[:,0,:] + # Compute the prompted logits (PromptSRC) + logits = logit_scale * image_features @ text_features.t() + + #SuPr part + projected_image_feature = self.project_space(image_features.unsqueeze(1).expand(-1, self.prompt_learner.n_cls, -1),\ + text_features_space) # n_query n_classes n_dim + cos_sim = torch.nn.CosineSimilarity(dim=2,eps=1e-07) + # Compute the space logits (SuPr) + logits_space = logit_scale * cos_sim(image_features.unsqueeze(1).float(),projected_image_feature) + + + if self.prompt_learner.training: + # Now calculate the frozen pre-trained features + fixed_embeddings = self.prompt_learner.fixed_embeddings # precomputed pre-trained frozen textual features + fixed_embeddings = fixed_embeddings / fixed_embeddings.norm(dim=-1, keepdim=True) + with torch.no_grad(): + zero_shot_features = self.prompt_learner.ZS_image_encoder(image.type(self.dtype)) + zero_shot_features = zero_shot_features / zero_shot_features.norm(dim=-1, keepdim=True) + # Compute pre-trained frozen visual features + zero_shot_logits = logit_scale * zero_shot_features.cuda() @ fixed_embeddings.half().cuda().t() + + #SuPr part + hard_prompt_feature = self.prompt_learner.hard_prompt_feature # T C D + projected_hardtext_feature = self.project_space(hard_prompt_feature, text_features_space) + + + return F.cross_entropy(logits,label), text_features, fixed_embeddings, zero_shot_features, \ + image_features, zero_shot_logits, logits, logits_space, \ + F.cross_entropy(logits_space, label), \ + F.cosine_embedding_loss(hard_prompt_feature.flatten(0,1), projected_hardtext_feature.flatten(0,1), + torch.ones(hard_prompt_feature.flatten(0,1).size(0)).to(label.device), margin=0.0), \ + + + else: + return self.ce_weight * logits_space + (1 - self.ce_weight) * logits + + def project_space(self, z_query, z_support): + # Work on support vectors + # the shape of z_support is [n_classes, n_support, n_dim] + #come half, trans float() for inverse, + z_support = z_support.float() + z_query = z_query.float() + + # use svd or not to calculate the projection + if self.use_svd: + + z_support = z_support.permute(0,2,1) #n_classes n_dim n_support + + try:# avoid dependency between support vectors + u, s, v = torch.linalg.svd(z_support, full_matrices=False) + except: + u, s, v = torch.linalg.svd(z_support + 1e-4 * torch.randn_like(z_support),full_matrices=False) + z_support = u + # Work on query vectors + # N_0 maybe the number of images or the number of hard prompts embedding + # z_query [N_0 n_classes n_dim] + # n_classes, n_support, n_dim * n_classes, n_dim, N_0 = n_classes, n_support, N_0 + self.beta_hat = torch.matmul(z_support.transpose(1,2), z_query.permute(1,2,0)) + z_lrc = torch.matmul(z_support,self.beta_hat) + return z_lrc.permute(2,0,1) + + else: #use least square to calculate the projection + try:# avoid dependency between support vectors + z_supports_inv = torch.matmul(torch.linalg.inv( + torch.matmul(z_support, z_support.transpose(1, 2))), z_support)# n_classes, n_support, n_dim + except: + z_supports_inv = torch.matmul(torch.linalg.inv( + torch.matmul(z_support, z_support.transpose(1, 2)) + 1e-4 * torch.eye( #n_classes, n_support, n_support + z_support.shape[1],).cuda().repeat(z_support.shape[0], 1, 1)), z_support)# n_classes, n_support, n_dim + + beta_hat = torch.matmul(z_supports_inv, z_query.permute(1, 2, 0)) # [n_classes, n_support, n_dim] * [n_classes, n_dim, N_0] = [n_classes, n_support, N_0] + z_lrc = torch.matmul(z_support.transpose(1, 2), beta_hat) # [n_classes, n_dim, n_support] * [n_classes, n_support, N_0] = n_classes, n_dim, T + + return z_lrc.permute(2,0,1) + + + +@TRAINER_REGISTRY.register() +class SubspacePromptSRC(PromptSRC): + """ + Subspace Prompting for PromptSRC + """ + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + all_classnames = [name.replace("_", " ") for name in self.dm.dataset.all_classnames] + max_name_len = max([len(_tokenizer.encode(name)) for name in all_classnames]) + 2 #'. EOS' + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg,False, max_name_len=max_name_len) + + if cfg.TRAINER.PROMPTSRC.PREC == "fp32" or cfg.TRAINER.PROMPTSRC.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + with open(cfg.TRAINER.SUPR.HARD_PROMPT_PATH + 'genertic_templates.yaml', 'r') as file: + genertic_hard_prompt = yaml.load(file, Loader=yaml.FullLoader) + templates = genertic_hard_prompt # + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model, templates) + + print("Turning off gradients in both the image and the text encoder") + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + # Make sure that VPT prompts are updated + if "VPT" in name: + param.requires_grad_(True) + else: + param.requires_grad_(False) + else: + if "ZS_image_encoder" in name: + param.requires_grad_(False) + + # Double check + enabled = set() + for name, param in self.model.named_parameters(): + if param.requires_grad: + enabled.add(name) + print(f"Parameters to be updated: {enabled}") + print(f"Parameters count: {len(enabled)}") + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("VLPromptLearner", self.model, self.optim, self.sched) + + # Cosine scheduler + self.total_epochs = cfg.OPTIM.MAX_EPOCH + self.step_counter = 1 + N = cfg.OPTIM.MAX_EPOCH + mean = cfg.TRAINER.PROMPTSRC.GPA_MEAN + stdev = cfg.TRAINER.PROMPTSRC.GPA_STD + gauss = self.get_gauss(mean, stdev) + self.gauss = np.array([gauss(a) for a in range(1, N + 1)]) + self.gauss = self.gauss / sum(self.gauss) + self.scaler = GradScaler() if cfg.TRAINER.PROMPTSRC.PREC == "amp" else None + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + # Keep model with GPA + self.previous_model_gpa = None + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + + model = self.model + optim = self.optim + scaler = self.scaler + + prec = self.cfg.TRAINER.PROMPTSRC.PREC + if prec == "amp": + with autocast(): + loss = model(image, label) + optim.zero_grad() + scaler.scale(loss).backward() + scaler.step(optim) + scaler.update() + else: + loss_ce, normalized_text_features, zs_clip_text_embeddings, zs_image_embedd, image_ft, \ + zero_shot_logits, logits, logits_space, loss_ce_space, loss_hard_reg = model(image, label) + # Calculate the L_SCL_text loss + loss_scl_text = F.l1_loss(normalized_text_features, zs_clip_text_embeddings.cuda(), + reduction='mean') + # Calculate the L_SCL_image loss + loss_scl_image = F.l1_loss(image_ft, zs_image_embedd.cuda(), + reduction='mean') + + # Now calculate L_SCL_logits + L_SCL_logits = F.kl_div( + F.log_softmax(logits / 1, dim=1), + F.log_softmax(zero_shot_logits / 1, dim=1), + reduction='sum', + log_target=True + ) * (1 * 1) / logits.numel() + L_SCL_logits_space = F.kl_div( + F.log_softmax(logits_space / 1, dim=1), + F.log_softmax(zero_shot_logits / 1, dim=1), + reduction='sum', + log_target=True + ) * (1 * 1) / logits.numel() + L_SCL = 0.7 * L_SCL_logits_space + 0.15 * L_SCL_logits + \ + loss_scl_image * self.cfg.TRAINER.PROMPTSRC.IMAGE_LOSS_WEIGHT + \ + loss_scl_text * self.cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT + loss = self.cfg.TRAINER.SUPR.LAMBDA * loss_ce_space + \ + (1 - self.cfg.TRAINER.SUPR.LAMBDA) * loss_ce + \ + + loss_hard_reg * self.cfg.TRAINER.SUPR.REG_LOSS_WEIGHT + L_SCL + optim.zero_grad() + loss.backward() + optim.step() + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(logits, label)[0].item(), + "loss_ce_point": loss_ce.item(), + "loss_ce_space": loss_ce_space.item(), + "loss_hard_reg": loss_hard_reg.item(), + "loss_SCL": L_SCL.item() + } + + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + # Means one epoch is completed, perform GPA + self.step_counter = self.step_counter + 1 + current_epoch_weight = self.gauss[self.step_counter - 2] + current_model_weights = copy.deepcopy(model.state_dict()) + weighted_state_dict = self.state_dict_weighting(current_model_weights, current_epoch_weight) + if self.previous_model_gpa is None: + self.previous_model_gpa = weighted_state_dict + else: + self.previous_model_gpa = self.state_dict_add(weighted_state_dict, self.previous_model_gpa) + + if self.step_counter == self.model.total_epochs + 1: + print("Using GPA model for final inference...") + state_dict = self.previous_model_gpa + filtered_state_dict = state_dict + model.load_state_dict(filtered_state_dict, strict=False) + self.model.load_state_dict(filtered_state_dict, strict=False) + + return loss_summary diff --git a/trainers/tcp.py b/trainers/tcp.py new file mode 100644 index 0000000000000000000000000000000000000000..8be3ca8fb9dde8169a098c1934e204f9cd59f031 --- /dev/null +++ b/trainers/tcp.py @@ -0,0 +1,377 @@ +import os.path as osp + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.cuda.amp import GradScaler, autocast +from collections import OrderedDict +import scipy.io as sio +import numpy as np +import copy + + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.metrics import compute_accuracy +from dassl.utils import load_pretrained_weights, load_checkpoint +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer +import tqdm + +_tokenizer = _Tokenizer() + + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + + design_details = {"trainer": 'TCP', + "vision_depth": 0, + "language_depth": 0, + "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + + return model + + +CUSTOM_TEMPLATES_ori = { + "OxfordPets": "a photo of a {}, a type of pet.", + "OxfordFlowers": "a photo of a {}, a type of flower.", + "FGVCAircraft": "a photo of an aircraft {}.", + "DescribableTextures": "a photo of a {}, a type of texture.", + "EuroSAT": "a centered satellite photo of {}.", + "StanfordCars": "a photo of a {}.", + "Food101": "a photo of a {}, a type of food.", + "SUN397": "a photo of a {}.", + "Caltech101": "a photo of a {}.", + "UCF101": "a photo of a person doing {}.", + "ImageNet": "a photo of a {}.", + "ImageNetSketch": "a photo of a {}.", + "ImageNetV2": "a photo of a {}.", + "ImageNetA": "a photo of a {}.", + "ImageNetR": "a photo of a {}.", +} + +CUSTOM_TEMPLATES = { + "OxfordPets": "X X X X {}, a type of pet.", + "OxfordFlowers": "X X X X {}, a type of flower.", + "FGVCAircraft": "X X X X {}, a type of aircraft.", + "DescribableTextures": "X X X X {} texture.", + "EuroSAT": "X X X X {}.", + "StanfordCars": "X X X X {}, a type of car", + "Food101": "X X X X {}, a type of food.", + "SUN397": "X X X X {}.", + "Caltech101": "X X X X {}.", + "UCF101": "a photo of a person doing {}.", + "ImageNet": "a photo of a {}.", + "ImageNetSketch": "a photo of a {}.", + "ImageNetV2": "a photo of a {}.", + "ImageNetA": "a photo of a {}.", + "ImageNetR": "a photo of a {}.", +} + + + + + +class TextEncoder(nn.Module): + def __init__(self, clip_model): + super().__init__() + self.transformer = clip_model.transformer + self.positional_embedding = clip_model.positional_embedding + self.ln_final = clip_model.ln_final + self.text_projection = clip_model.text_projection + self.dtype = clip_model.dtype + + def forward(self, prompts, class_feature, weight, tokenized_prompts,flag=False): + x = prompts + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + if flag: + x = self.transformer(x) + else: + counter=0 + outputs = self.transformer.resblocks([x,class_feature,weight,counter]) + x = outputs[0] + + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection + return x + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + +class QuickGELU(nn.Module): + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + +class PromptLearner(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + n_cls = len(classnames) + n_ctx = cfg.TRAINER.TCP.N_CTX + ctx_init = cfg.TRAINER.TCP.CTX_INIT + dtype = clip_model.dtype + ctx_dim = clip_model.ln_final.weight.shape[0] + clip_imsize = clip_model.visual.input_resolution + cfg_imsize = cfg.INPUT.SIZE[0] + assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + if ctx_init: + print("use given words to initialize context vectors") + # temp = 'a photo of a' + ctx_init = ctx_init.replace("_", " ") + n_ctx = len(ctx_init.split(" ")) + prompt = clip.tokenize(ctx_init) + with torch.no_grad(): + embedding = clip_model.token_embedding(prompt).type(dtype) + + ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] + prompt_prefix = ctx_init + + ctx_vectors_src = embedding[0, 1 : 1 + n_ctx, :] + + else: + # random initialization + if cfg.TRAINER.TCP.CSC: + print("Initializing class-specific contexts") + ctx_vectors = torch.empty(n_cls, n_ctx, ctx_dim, dtype=dtype) + else: + print("Initializing a generic context") + ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) + nn.init.normal_(ctx_vectors, std=0.02) + prompt_prefix = " ".join(["X"] * n_ctx) + + + print(f'Initial context: "{prompt_prefix}"') + print(f"Number of context words (tokens): {n_ctx}") + + self.ctx = nn.Parameter(ctx_vectors) # to be optimized + + clip_model_ = load_clip_to_cpu(cfg) + clip_model_.cuda() + + temp = CUSTOM_TEMPLATES_ori[cfg.DATASET.NAME] + prompts_ = [temp.format(c.replace("_", " ")) for c in classnames] + prompts_ = torch.cat([clip.tokenize(p) for p in prompts_]) + prompts_ = prompts_.cuda() + + with torch.no_grad(): + text_features = clip_model_.encode_text(prompts_) + self.text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + clip_model_.to('cpu') + del(clip_model_) + + vis_dim = clip_model.visual.output_dim + self.meta_net = nn.Sequential( + OrderedDict([("linear1", nn.Linear(vis_dim, vis_dim // 4,bias=True)), + ("relu", QuickGELU()), + ("linear2", nn.Linear(vis_dim // 4, 4*ctx_dim,bias=True)) + ])) + if cfg.TRAINER.TCP.PREC == "fp16": + self.meta_net.half() + classnames = [name.replace("_", " ") for name in classnames] + temp = CUSTOM_TEMPLATES[cfg.DATASET.NAME] + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + print(prompts) + + tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) + with torch.no_grad(): + embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) + + self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS + self.n_cls = n_cls + self.n_ctx = n_ctx + self.tokenized_prompts = tokenized_prompts # torch.Tensor + self.prev_ctx=None + + def forward(self): + class_feature = self.meta_net(self.text_features) + class_feature = class_feature.reshape(class_feature.shape[0],-1,512) + prefix = self.token_prefix + suffix = self.token_suffix + ctx = self.ctx + ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) + prompt = torch.cat( + [ + prefix, # (n_cls, 1, dim) + ctx, + suffix, # (n_cls, *, dim) + ], + dim=1, + ) + return prompt, class_feature + + + +class CustomCLIP(nn.Module): + def __init__(self, cfg, classnames, clip_model): + super().__init__() + self.prompt_learner = PromptLearner(cfg, classnames, clip_model) + self.tokenized_prompts = self.prompt_learner.tokenized_prompts + self.ori_embedding = self.prompt_learner.text_features + self.image_encoder = clip_model.visual + self.text_encoder = TextEncoder(clip_model) + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + self.weight = cfg.TRAINER.TCP.W + + def forward(self, image, label=None): + image_features = self.image_encoder(image.type(self.dtype)) + text_features_old = self.ori_embedding + cos = torch.nn.CosineSimilarity(dim=1,eps=1e-07) + text_features_old = text_features_old / text_features_old.norm(dim=-1, keepdim=True) + tokenized_prompts = self.tokenized_prompts + logit_scale = self.logit_scale.exp() + + prompts,class_prompt = self.prompt_learner() + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = self.text_encoder(prompts, class_prompt, self.weight,tokenized_prompts.detach()) + text_features_norm = text_features / text_features.norm(dim=-1, keepdim=True) + logits = logit_scale.detach() * image_features.detach() @ text_features_norm.t() + + if self.prompt_learner.training: + score= cos(text_features_norm,text_features_old) + score = 1.0-torch.mean(score) + loss = F.cross_entropy(logits, label)+8.0*score + return logits, loss + else: + return logits + + +@TRAINER_REGISTRY.register() +class TCP(TrainerX): + + def check_cfg(self, cfg): + assert cfg.TRAINER.TCP.PREC in ["fp16", "fp32", "amp"] + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + print(classnames) + self.n_cls = len(classnames) + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + + if cfg.TRAINER.TCP.PREC == "fp32" or cfg.TRAINER.TCP.PREC == "amp": + # CLIP's default precision is fp16 + clip_model.float() + + print("Building custom CLIP") + self.model = CustomCLIP(cfg, classnames, clip_model) + self.w = cfg.TRAINER.TCP.W + + print("Turning off gradients in both the image and the text encoder") + + name_to_update = "prompt_learner" + + for name, param in self.model.named_parameters(): + if name_to_update not in name: + param.requires_grad_(False) + else: + print(name) + + + if cfg.MODEL.INIT_WEIGHTS: + load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) + + self.model.to(self.device) + # NOTE: only give prompt_learner to the optimizer + self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) + self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) + self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) + + self.scaler = GradScaler() if cfg.TRAINER.TCP.PREC == "amp" else None + + # Note that multi-gpu training could be slow because CLIP's size is + # big, which slows down the copy operation in DataParallel + device_count = torch.cuda.device_count() + if device_count > 1: + print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") + self.model = nn.DataParallel(self.model) + self.proto=None + + def forward_backward(self, batch): + image, label = self.parse_batch_train(batch) + prec = self.cfg.TRAINER.TCP.PREC + if prec == "amp": + with autocast(): + output = self.model(image) + loss = F.cross_entropy(output, label) + self.optim.zero_grad() + self.scaler.scale(loss).backward() + self.scaler.step(self.optim) + self.scaler.update() + else: + output,loss = self.model(image, label) + self.model_backward_and_update(loss) + + loss_summary = { + "loss": loss.item(), + "acc": compute_accuracy(output, label)[0].item(), + } + if (self.batch_idx + 1) == self.num_batches: + self.update_lr() + return loss_summary + + def parse_batch_train(self, batch): + input = batch["img"] + label = batch["label"] + input = input.to(self.device) + label = label.to(self.device) + return input, label + + #def model_inference(self, input): + # return self.model(input) + + + def load_model(self, directory, epoch=None): + if not directory: + print("Note that load_model() is skipped as no pretrained model is given") + return + + names = self.get_model_names() + print(names) + + # By default, the best model is loaded + model_file = "model-best.pth.tar" + + if epoch is not None: + model_file = "model.pth.tar-" + str(epoch) + + for name in names: + model_path = osp.join(directory, name, model_file) + + if not osp.exists(model_path): + raise FileNotFoundError('Model not found at "{}"'.format(model_path)) + + checkpoint = load_checkpoint(model_path) + state_dict = checkpoint["state_dict"] + epoch = checkpoint["epoch"] + + # Ignore fixed token vectors + if "token_prefix" in state_dict: + del state_dict["token_prefix"] + + if "token_suffix" in state_dict: + del state_dict["token_suffix"] + + if "token_midfix" in state_dict: + del state_dict["token_midfix"] + print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) + # set strict=False + self._models[name].load_state_dict(state_dict, strict=False) diff --git a/trainers/zsclip.py b/trainers/zsclip.py new file mode 100644 index 0000000000000000000000000000000000000000..6474b753a5c50ab62125e8c8a7567bf4c53bad92 --- /dev/null +++ b/trainers/zsclip.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn + +from dassl.engine import TRAINER_REGISTRY, TrainerX +from dassl.optim import build_optimizer, build_lr_scheduler + +from clip import clip +from clip.model import convert_weights + + +from .imagenet_templates import IMAGENET_TEMPLATES, IMAGENET_TEMPLATES_SELECT + +CUSTOM_TEMPLATES = { + "OxfordPets": "a photo of a {}, a type of pet.", + "OxfordFlowers": "a photo of a {}, a type of flower.", + "FGVCAircraft": "a photo of a {}, a type of aircraft.", + "DescribableTextures": "{} texture.", + "EuroSAT": "a centered satellite photo of {}.", + "StanfordCars": "a photo of a {}.", + "Food101": "a photo of {}, a type of food.", + "SUN397": "a photo of a {}.", + "Caltech101": "a photo of a {}.", + "UCF101": "a photo of a person doing {}.", + "ImageNet": "a photo of a {}.", + "ImageNetSketch": "a photo of a {}.", + "ImageNetV2": "a photo of a {}.", + "ImageNetA": "a photo of a {}.", + "ImageNetR": "a photo of a {}.", +} + +def load_clip_to_cpu(cfg): + backbone_name = cfg.MODEL.BACKBONE.NAME + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + + design_details = {"trainer": 'IVLP', + "vision_depth": 0, + "language_depth": 0, "vision_ctx": 0, + "language_ctx": 0} + model = clip.build_model(state_dict or model.state_dict(), design_details) + return model + +@TRAINER_REGISTRY.register() +class ZeroshotCLIP(TrainerX): + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + clip_model.to(self.device) + + temp = CUSTOM_TEMPLATES[cfg.DATASET.NAME] + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + print(f"Prompts: {prompts}") + prompts = torch.cat([clip.tokenize(p) for p in prompts]) + prompts = prompts.to(self.device) + + with torch.no_grad(): + text_features = clip_model.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + self.text_features = text_features + self.clip_model = clip_model + + def model_inference(self, image): + image_features = self.clip_model.encode_image(image) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + logit_scale = self.clip_model.logit_scale.exp() + logits = logit_scale * image_features @ self.text_features.t() + return logits + + +@TRAINER_REGISTRY.register() +class ZeroshotCLIP2(ZeroshotCLIP): + """Prompt ensembling.""" + + # templates = IMAGENET_TEMPLATES + templates = IMAGENET_TEMPLATES_SELECT + + def build_model(self): + cfg = self.cfg + classnames = self.dm.dataset.classnames + + print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") + clip_model = load_clip_to_cpu(cfg) + clip_model.to(self.device) + + for params in clip_model.parameters(): + params.requires_grad_(False) + + # add custom-made prompt + if cfg.DATASET.NAME != "ImageNet": + self.templates += [CUSTOM_TEMPLATES[cfg.DATASET.NAME]] + + num_temp = len(self.templates) + print(f"Prompt ensembling (n={num_temp})") + + mean_text_features = 0 + for i, temp in enumerate(self.templates): + prompts = [temp.format(c.replace("_", " ")) for c in classnames] + prompts = torch.cat([clip.tokenize(p) for p in prompts]).to(self.device) + text_features = clip_model.encode_text(prompts) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + mean_text_features = mean_text_features + text_features + mean_text_features = mean_text_features / num_temp + mean_text_features = mean_text_features / mean_text_features.norm(dim=-1, keepdim=True) + + self.text_features = mean_text_features + self.clip_model = clip_model \ No newline at end of file