image-classifier / model_info.json
justin-onda's picture
model_info.json ν˜„ν–‰ν™”
553314a
{
"model_architecture": {
"backbone": "facebook/dinov2-large",
"backbone_details": {
"model_type": "Vision Transformer (ViT)",
"variant": "Large",
"patch_size": 14,
"num_hidden_layers": 24,
"num_attention_heads": 16,
"hidden_size": 1024,
"intermediate_size": 4096,
"pretrained_image_size": 518,
"finetuned_image_size": 224
},
"feature_dim": 1024,
"encoder_parameters": 304367634,
"head_trainable_parameters": 24598,
"freeze_backbone": true,
"encoder_output_shape": {
"description": "Encoder outputs full sequence of tokens including CLS token",
"raw_shape": ["batch_size", 257, 1024],
"tokens_breakdown": {
"cls_token": 1,
"patch_tokens": 256,
"total": 257
},
"usage": "CLS token (index 0) is extracted for feature representation"
}
},
"input_specification": {
"image_size": [
224,
224
],
"channels": 3,
"pixel_range": [
0.0,
1.0
],
"normalization": {
"mean": [
0.485,
0.456,
0.406
],
"std": [
0.229,
0.224,
0.225
],
"description": "ImageNet normalization for DINOv2"
},
"input_format": "RGB",
"tensor_layout": "NCHW"
},
"output_specification": {
"heads": {
"scene": {
"num_classes": 6,
"output_type": "logits",
"activation": "softmax",
"classes": [
16000001,
16000002,
16000006,
16000008,
16000009,
16000011
]
},
"concept": {
"num_classes": 3,
"output_type": "logits",
"activation": "softmax",
"classes": [
17000001,
17000002,
17000003
]
},
"object": {
"num_classes": 13,
"output_type": "logits",
"activation": "softmax",
"classes": [
18000001,
18000002,
18000004,
18000005,
18000006,
18000007,
18000008,
18000009,
18000010,
18000012,
18000014,
18000016,
"unclassified"
]
}
}
},
"class_mappings": {
"scene": {
"0": 16000001,
"1": 16000002,
"2": 16000006,
"3": 16000008,
"4": 16000009,
"5": 16000011
},
"concept": {
"0": 17000001,
"1": 17000002,
"2": 17000003
},
"object": {
"0": 18000001,
"1": 18000002,
"2": 18000004,
"3": 18000005,
"4": 18000006,
"5": 18000007,
"6": 18000008,
"7": 18000009,
"8": 18000010,
"9": 18000012,
"10": 18000014,
"11": 18000016,
"12": "unclassified"
}
}
}