Source code for lightning_ir.data.external_datasets.cross_architecture_knowledge_distillation

 1from .ir_datasets_utils import ScoredDocTuples, register_new_dataset
 2
 3
[docs] 4def register_kd_docpairs(): 5 dlc_contents = { 6 "url": ( 7 "https://zenodo.org/record/4068216/files/bert_cat_ensemble_msmarcopassage_train_scores_ids.tsv?download=1" 8 ), 9 "expected_md5": "4d99696386f96a7f1631076bcc53ac3c", 10 "cache_path": "msmarco-passage/train/bert_cat_ensemble_msmarcopassage_train_scores_ids.tsv", 11 } 12 register_new_dataset( 13 "msmarco-passage/train/kd-docpairs", 14 docs="msmarco-passage", 15 queries="msmarco-passage/train", 16 qrels="msmarco-passage/train", 17 docpairs=dlc_contents, 18 DocpairsType=ScoredDocTuples, 19 )