Update README.md
Browse files
README.md
CHANGED
|
@@ -2,11 +2,11 @@
|
|
| 2 |
library_name: sentence-transformers
|
| 3 |
pipeline_tag: sentence-similarity
|
| 4 |
tags:
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
model-index:
|
| 11 |
- name: epoch_0_model
|
| 12 |
results:
|
|
@@ -882,7 +882,7 @@ model-index:
|
|
| 882 |
- type: precision_at_1
|
| 883 |
value: 20.096
|
| 884 |
- type: precision_at_10
|
| 885 |
-
value: 5
|
| 886 |
- type: precision_at_100
|
| 887 |
value: 0.8750000000000001
|
| 888 |
- type: precision_at_1000
|
|
@@ -1201,7 +1201,7 @@ model-index:
|
|
| 1201 |
- type: map_at_5
|
| 1202 |
value: 16.448999999999998
|
| 1203 |
- type: mrr_at_1
|
| 1204 |
-
value: 71
|
| 1205 |
- type: mrr_at_10
|
| 1206 |
value: 77.68599999999999
|
| 1207 |
- type: mrr_at_100
|
|
@@ -1225,7 +1225,7 @@ model-index:
|
|
| 1225 |
- type: ndcg_at_5
|
| 1226 |
value: 46.317
|
| 1227 |
- type: precision_at_1
|
| 1228 |
-
value: 71
|
| 1229 |
- type: precision_at_10
|
| 1230 |
value: 34.4
|
| 1231 |
- type: precision_at_100
|
|
@@ -1370,7 +1370,7 @@ model-index:
|
|
| 1370 |
- type: ndcg_at_100
|
| 1371 |
value: 43.832
|
| 1372 |
- type: ndcg_at_1000
|
| 1373 |
-
value: 47
|
| 1374 |
- type: ndcg_at_3
|
| 1375 |
value: 33.694
|
| 1376 |
- type: ndcg_at_5
|
|
@@ -2337,19 +2337,19 @@ model-index:
|
|
| 2337 |
- type: map_at_5
|
| 2338 |
value: 1.185
|
| 2339 |
- type: mrr_at_1
|
| 2340 |
-
value: 94
|
| 2341 |
- type: mrr_at_10
|
| 2342 |
-
value: 97
|
| 2343 |
- type: mrr_at_100
|
| 2344 |
-
value: 97
|
| 2345 |
- type: mrr_at_1000
|
| 2346 |
-
value: 97
|
| 2347 |
- type: mrr_at_3
|
| 2348 |
-
value: 97
|
| 2349 |
- type: mrr_at_5
|
| 2350 |
-
value: 97
|
| 2351 |
- type: ndcg_at_1
|
| 2352 |
-
value: 89
|
| 2353 |
- type: ndcg_at_10
|
| 2354 |
value: 82.30499999999999
|
| 2355 |
- type: ndcg_at_100
|
|
@@ -2361,17 +2361,17 @@ model-index:
|
|
| 2361 |
- type: ndcg_at_5
|
| 2362 |
value: 86.05199999999999
|
| 2363 |
- type: precision_at_1
|
| 2364 |
-
value: 94
|
| 2365 |
- type: precision_at_10
|
| 2366 |
-
value: 87
|
| 2367 |
- type: precision_at_100
|
| 2368 |
value: 63.38
|
| 2369 |
- type: precision_at_1000
|
| 2370 |
value: 23.498
|
| 2371 |
- type: precision_at_3
|
| 2372 |
-
value: 94
|
| 2373 |
- type: precision_at_5
|
| 2374 |
-
value: 92
|
| 2375 |
- type: recall_at_1
|
| 2376 |
value: 0.242
|
| 2377 |
- type: recall_at_10
|
|
@@ -2602,20 +2602,32 @@ model-index:
|
|
| 2602 |
value: 86.65166462876266
|
| 2603 |
- type: max_f1
|
| 2604 |
value: 79.07408783532733
|
|
|
|
| 2605 |
---
|
| 2606 |
|
| 2607 |
-
# nomic-embed-text-v1:
|
| 2608 |
|
| 2609 |
-
`nomic-embed-text-v1` is
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2610 |
|
| 2611 |
|
| 2612 |
|
| 2613 |
-
| Name | SeqLen | MTEB | LoCo | Jina Long Context | Open Weights | Open Training Code | Open Data |
|
| 2614 |
-
| :-------------------------------:| :----- | :-------- | :------: | :---------------: | :-----------: | :----------------: | :---------- |
|
| 2615 |
-
| nomic-embed-text-v1 | 8192 | **62.39** |**85.53** | 54.16 | ✅ | ✅ | ✅ |
|
| 2616 |
-
| jina-embeddings-v2-base-en | 8192 | 60.39 | 85.45 | 51.90 | ✅ | ❌ | ❌ |
|
| 2617 |
-
| text-embedding-3-small | 8191 | 62.26 | 82.40 | **58.20** | ❌ | ❌ | ❌ |
|
| 2618 |
-
| text-embedding-ada-002 | 8191 | 60.99 | 52.7 | 55.25 | ❌ | ❌ | ❌ |
|
| 2619 |
|
| 2620 |
|
| 2621 |
## Hosted Inference API
|
|
|
|
| 2 |
library_name: sentence-transformers
|
| 3 |
pipeline_tag: sentence-similarity
|
| 4 |
tags:
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- sentence-similarity
|
| 7 |
+
- mteb
|
| 8 |
+
- transformers
|
| 9 |
+
- transformers.js
|
| 10 |
model-index:
|
| 11 |
- name: epoch_0_model
|
| 12 |
results:
|
|
|
|
| 882 |
- type: precision_at_1
|
| 883 |
value: 20.096
|
| 884 |
- type: precision_at_10
|
| 885 |
+
value: 5
|
| 886 |
- type: precision_at_100
|
| 887 |
value: 0.8750000000000001
|
| 888 |
- type: precision_at_1000
|
|
|
|
| 1201 |
- type: map_at_5
|
| 1202 |
value: 16.448999999999998
|
| 1203 |
- type: mrr_at_1
|
| 1204 |
+
value: 71
|
| 1205 |
- type: mrr_at_10
|
| 1206 |
value: 77.68599999999999
|
| 1207 |
- type: mrr_at_100
|
|
|
|
| 1225 |
- type: ndcg_at_5
|
| 1226 |
value: 46.317
|
| 1227 |
- type: precision_at_1
|
| 1228 |
+
value: 71
|
| 1229 |
- type: precision_at_10
|
| 1230 |
value: 34.4
|
| 1231 |
- type: precision_at_100
|
|
|
|
| 1370 |
- type: ndcg_at_100
|
| 1371 |
value: 43.832
|
| 1372 |
- type: ndcg_at_1000
|
| 1373 |
+
value: 47
|
| 1374 |
- type: ndcg_at_3
|
| 1375 |
value: 33.694
|
| 1376 |
- type: ndcg_at_5
|
|
|
|
| 2337 |
- type: map_at_5
|
| 2338 |
value: 1.185
|
| 2339 |
- type: mrr_at_1
|
| 2340 |
+
value: 94
|
| 2341 |
- type: mrr_at_10
|
| 2342 |
+
value: 97
|
| 2343 |
- type: mrr_at_100
|
| 2344 |
+
value: 97
|
| 2345 |
- type: mrr_at_1000
|
| 2346 |
+
value: 97
|
| 2347 |
- type: mrr_at_3
|
| 2348 |
+
value: 97
|
| 2349 |
- type: mrr_at_5
|
| 2350 |
+
value: 97
|
| 2351 |
- type: ndcg_at_1
|
| 2352 |
+
value: 89
|
| 2353 |
- type: ndcg_at_10
|
| 2354 |
value: 82.30499999999999
|
| 2355 |
- type: ndcg_at_100
|
|
|
|
| 2361 |
- type: ndcg_at_5
|
| 2362 |
value: 86.05199999999999
|
| 2363 |
- type: precision_at_1
|
| 2364 |
+
value: 94
|
| 2365 |
- type: precision_at_10
|
| 2366 |
+
value: 87
|
| 2367 |
- type: precision_at_100
|
| 2368 |
value: 63.38
|
| 2369 |
- type: precision_at_1000
|
| 2370 |
value: 23.498
|
| 2371 |
- type: precision_at_3
|
| 2372 |
+
value: 94
|
| 2373 |
- type: precision_at_5
|
| 2374 |
+
value: 92
|
| 2375 |
- type: recall_at_1
|
| 2376 |
value: 0.242
|
| 2377 |
- type: recall_at_10
|
|
|
|
| 2602 |
value: 86.65166462876266
|
| 2603 |
- type: max_f1
|
| 2604 |
value: 79.07408783532733
|
| 2605 |
+
license: apache-2.0
|
| 2606 |
---
|
| 2607 |
|
| 2608 |
+
# nomic-embed-text-v1.5: Resizable Production Embeddings with Matryoshka Representation Learning
|
| 2609 |
|
| 2610 |
+
`nomic-embed-text-v1.5` is an improvement upon [Nomic Embed](https://huggingface.co/nomic-ai/nomic-embed-text-v1) that utilizes [Matryoshka Representation Learning](https://arxiv.org/abs/2205.13147) which gives developers the flexibility to trade off the embedding size for a negligible reduction in performance.
|
| 2611 |
+
|
| 2612 |
+
|
| 2613 |
+
|
| 2614 |
+
| Name | SeqLen | Dimension | MTEB |
|
| 2615 |
+
| :-------------------------------:| :----- | :-------- | :------: |
|
| 2616 |
+
| nomic-embed-text-v1 | 8192 | 768 | **62.39** |
|
| 2617 |
+
| nomic-embed-text-v1.5 | 8192 | 768 | 62.28 |
|
| 2618 |
+
| nomic-embed-text-v1.5 | 8192 | 512 | 61.96 |
|
| 2619 |
+
| nomic-embed-text-v1.5 | 8192 | 256 | 61.04 |
|
| 2620 |
+
| nomic-embed-text-v1.5 | 8192 | 128 | 59.34 |
|
| 2621 |
+
| nomic-embed-text-v1.5 | 8192 | 64 | 56.10 |
|
| 2622 |
+
| text-embedding-3-small | 8192 | 1536 | 62.26 |
|
| 2623 |
+
| text-embedding-3-small | 8192 | 512 | 61.60 |
|
| 2624 |
+
|
| 2625 |
+
|
| 2626 |
+
|
| 2627 |
+

|
| 2628 |
|
| 2629 |
|
| 2630 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2631 |
|
| 2632 |
|
| 2633 |
## Hosted Inference API
|