Source code for lightning_ir.data.external_datasets.cross_architecture_knowledge_distillation
1from .ir_datasets_utils import ScoredDocTuples, register_new_dataset
2
3
[docs]
4def register_kd_docpairs():
5 dlc_contents = {
6 "url": (
7 "https://zenodo.org/record/4068216/files/bert_cat_ensemble_msmarcopassage_train_scores_ids.tsv?download=1"
8 ),
9 "expected_md5": "4d99696386f96a7f1631076bcc53ac3c",
10 "cache_path": "msmarco-passage/train/bert_cat_ensemble_msmarcopassage_train_scores_ids.tsv",
11 }
12 register_new_dataset(
13 "msmarco-passage/train/kd-docpairs",
14 docs="msmarco-passage",
15 queries="msmarco-passage/train",
16 qrels="msmarco-passage/train",
17 docpairs=dlc_contents,
18 DocpairsType=ScoredDocTuples,
19 )