Update contamination_report.csv
Browse files- contamination_report.csv +8 -1
contamination_report.csv
CHANGED
|
@@ -467,13 +467,20 @@ RadNLI;;GPT-3.5;model;0.0;0.0;0.0;model-based;https://arxiv.org/pdf/2308.08493;8
|
|
| 467 |
cais/mmlu;;GPT-3.5;model;;;52.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 468 |
winogrande;;GPT-3.5;model;;;9.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 469 |
truthful_qa;;GPT-3.5;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
|
| 470 |
|
| 471 |
cais/mmlu;;GPT-4;model;;;57.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 472 |
truthful_qa;;GPT-4;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 473 |
winogrande;;GPT-4;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
|
|
|
|
|
|
|
| 474 |
|
| 475 |
allenai/openbookqa;;LLaMa 2-13B;model;;;4.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
|
|
|
|
| 476 |
|
| 477 |
truthful_qa;;Mistral-7B;model;;;15.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 478 |
allenai/openbookqa;;Mistral-7B;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 479 |
-
winogrande;;Mistral-7B;model;;;3.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
|
|
|
| 467 |
cais/mmlu;;GPT-3.5;model;;;52.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 468 |
winogrande;;GPT-3.5;model;;;9.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 469 |
truthful_qa;;GPT-3.5;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 470 |
+
allenai/openbookqa;;GPT-3.5;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 471 |
|
| 472 |
cais/mmlu;;GPT-4;model;;;57.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 473 |
truthful_qa;;GPT-4;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 474 |
winogrande;;GPT-4;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 475 |
+
allenai/openbookqa;;GPT-4;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 476 |
+
Rowan/hellaswag;;GPT-4;model;;;2.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 477 |
+
|
| 478 |
|
| 479 |
allenai/openbookqa;;LLaMa 2-13B;model;;;4.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 480 |
+
truthful_qa;;LLaMa 2-13B;model;;;2.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 481 |
+
winogrande;;LLaMa 2-13B;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 482 |
|
| 483 |
truthful_qa;;Mistral-7B;model;;;15.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 484 |
allenai/openbookqa;;Mistral-7B;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 485 |
+
winogrande;;Mistral-7B;model;;;3.0;model-based;https://arxiv.org/abs/2311.09783;10
|
| 486 |
+
cais/mmlu;;Mistral-7B;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|