Working with evaluation tables¶
First we’ll prepare some data so we can have something to work with:
>>> from paperswithcode import PapersWithCodeClient, models
>>> client = PapersWithCodeClient(token='YOUR TOKEN')
# Task
>>> task = client.task_list().results[0]
>>> task.id
'triple-classification'
# Datasets
>>> datasets = client.dataset_list()
>>> dataset_1 = datasets.results[0]
>>> dataset_2 = datasets.results[1]
>>> dataset_1.id, dataset_2.id
('rewrite', 'leitner-et-al-2020')
# Paper
>>> paper = client.paper_list().results[0]
>>> paper.id
'on-the-minimal-teaching-sets-of-two'
# We can now create and manipulate evaluation tables:
>>> et = client.evaluation_create(
... models.EvaluationTableCreateRequest(
... task=task.id,
... dataset=dataset_1.id
... )
... )
>>> et.id
'triple-classification-on-rewrite-8'
# Get by id:
>>> client.evaluation_get(et.id)
EvaluationTable(
id='triple-classification-on-rewrite-8',
task='triple-classification',
dataset='rewrite'
)
# Update
>>> et = client.evaluation_update(et.id, models.EvaluationTableUpdateRequest(
... dataset=dataset_2.id
... ))
>>> et
EvaluationTable(
id='triple-classification-on-rewrite-8',
task='triple-classification',
dataset='leitner-et-al-2020'
)
# Get Metrics
>>> client.evaluation_metric_list(et.id)
[]
# Add a metric
>>> m = client.evaluation_metric_add(
... et.id,
... models.MetricCreateRequest(
... name="some metric",
... description="Metric description",
... is_loss=True
... )
... )
>>> m
Metric(
id='748dccd5-0a28-432c-98ec-7ab74a127ede',
name='some metric',
description='Metric description',
is_loss=True
)
# Update a metric
>>> m = client.evaluation_metric_update(
... et.id,
... m.id,
... models.MetricUpdateRequest(is_loss=False)
... )
>>> m
Metric(
id='748dccd5-0a28-432c-98ec-7ab74a127ede',
name='some metric',
description='Metric description',
is_loss=False
)
# Get results
>>> client.evaluation_result_list(et.id)
[]
# Add a result
>>> r = client.evaluation_result_add(
... et.id,
... models.ResultCreateRequest(
... metrics={"some metric": '44'},
... methodology="Some methodologoy",
... uses_additional_data=False,
... paper=paper.id # Optional
... )
... )
>>> r
Result(
id='81cdfce5-d976-4d29-93a7-23a29118b037',
best_rank=None,
metrics={'some metric': '44'},
methodology='Some methodologoy',
uses_additional_data=False,
paper='on-the-minimal-teaching-sets-of-two',
best_metric=None
)
# Update result
>>> r = client.evaluation_result_update(
... et.id,
... r.id,
... models.ResultUpdateRequest(
... methodology="Some other methodologoy",
... uses_additional_data=True,
... )
... )
>>> r
Result(
id='81cdfce5-d976-4d29-93a7-23a29118b037',
best_rank=None,
metrics={'some metric': '44'},
methodology='Some other methodologoy',
uses_additional_data=True,
paper='on-the-minimal-teaching-sets-of-two',
best_metric=None
)
# List the results again
>>> client.evaluation_result_list(et.id)
[
Result(
id='81cdfce5-d976-4d29-93a7-23a29118b037',
best_rank=None,
metrics={'some metric': '44'},
methodology='Some other methodologoy',
uses_additional_data=True,
paper='on-the-minimal-teaching-sets-of-two',
best_metric=None
)
]
# Delete a result
>>> client.evaluation_result_delete(et.id, r.id)
>>> client.evaluation_result_list(et.id)
[]