Spaces:
Sleeping
Sleeping
first commit
Browse files- execution_accuracy.py +25 -6
- requirements.txt +2 -1
execution_accuracy.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
| 15 |
|
| 16 |
import evaluate
|
| 17 |
import datasets
|
| 18 |
-
|
| 19 |
|
| 20 |
# TODO: Add BibTeX citation
|
| 21 |
_CITATION = """\
|
|
@@ -71,8 +71,9 @@ class ExecutionAccuracy(evaluate.Metric):
|
|
| 71 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 72 |
# This defines the format of each prediction and reference
|
| 73 |
features=datasets.Features({
|
| 74 |
-
'predictions': datasets.Value('
|
| 75 |
-
'references': datasets.Value('
|
|
|
|
| 76 |
}),
|
| 77 |
# Homepage of the module for documentation
|
| 78 |
homepage="http://module.homepage",
|
|
@@ -86,10 +87,28 @@ class ExecutionAccuracy(evaluate.Metric):
|
|
| 86 |
# TODO: Download external resources if needed
|
| 87 |
pass
|
| 88 |
|
| 89 |
-
def _compute(self, predictions, references):
|
| 90 |
"""Returns the scores"""
|
| 91 |
# TODO: Compute the different scores of the module
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
return {
|
| 94 |
-
"accuracy": accuracy,
|
| 95 |
}
|
|
|
|
| 15 |
|
| 16 |
import evaluate
|
| 17 |
import datasets
|
| 18 |
+
from records import Database
|
| 19 |
|
| 20 |
# TODO: Add BibTeX citation
|
| 21 |
_CITATION = """\
|
|
|
|
| 71 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 72 |
# This defines the format of each prediction and reference
|
| 73 |
features=datasets.Features({
|
| 74 |
+
'predictions': datasets.Value('string'),
|
| 75 |
+
'references': datasets.Value('string'),
|
| 76 |
+
'db_urls': datasets.Value('string'),
|
| 77 |
}),
|
| 78 |
# Homepage of the module for documentation
|
| 79 |
homepage="http://module.homepage",
|
|
|
|
| 87 |
# TODO: Download external resources if needed
|
| 88 |
pass
|
| 89 |
|
| 90 |
+
def _compute(self, predictions, references, db_urls):
|
| 91 |
"""Returns the scores"""
|
| 92 |
# TODO: Compute the different scores of the module
|
| 93 |
+
cnt = 0
|
| 94 |
+
for prediction, reference, db_url in zip(predictions, references, db_urls):
|
| 95 |
+
db = Database(db_url)
|
| 96 |
+
try:
|
| 97 |
+
pred = db.query(predictions).as_dict()
|
| 98 |
+
except Exception as e:
|
| 99 |
+
pred = []
|
| 100 |
+
try:
|
| 101 |
+
ref = db.query(references).as_dict()
|
| 102 |
+
except Exception as e:
|
| 103 |
+
ref = []
|
| 104 |
+
pred = [tuple(x.values()) for x in pred]
|
| 105 |
+
ref = [tuple(x.values()) for x in ref]
|
| 106 |
+
if len(pred) == len(ref):
|
| 107 |
+
pred.sort(key=lambda x: hash(x))
|
| 108 |
+
ref.sort(key=lambda x: hash(x))
|
| 109 |
+
if pred == ref:
|
| 110 |
+
cnt += 1
|
| 111 |
+
accuracy = cnt / len(predictions)
|
| 112 |
return {
|
| 113 |
+
"execution accuracy": accuracy,
|
| 114 |
}
|
requirements.txt
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
-
git+https://github.com/huggingface/evaluate@main
|
|
|
|
|
|
| 1 |
+
git+https://github.com/huggingface/evaluate@main
|
| 2 |
+
records
|