Source code for lightning_ir.data.external_datasets.colbert

 1from .ir_datasets_utils import ScoredDocTuples, register_new_dataset
 2
 3
[docs] 4def register_colbert_docpairs(): 5 dlc_contents = { 6 "url": "https://huggingface.co/colbert-ir/colbertv2.0_msmarco_64way/resolve/main/examples.json?download=true", 7 "expected_md5": "8be0c71e330ac54dcd77fba058d291c7", 8 "cache_path": "msmarco-passage/train/colbert_64way.json", 9 } 10 register_new_dataset( 11 "msmarco-passage/train/colbert-docpairs", 12 docs="msmarco-passage", 13 queries="msmarco-passage/train", 14 qrels="msmarco-passage/train", 15 docpairs=dlc_contents, 16 DocpairsType=ScoredDocTuples, 17 )