Source code for lightning_ir.data.external_datasets.colbert
1from .ir_datasets_utils import ScoredDocTuples, register_new_dataset
2
3
[docs]
4def register_colbert_docpairs():
5 dlc_contents = {
6 "url": "https://huggingface.co/colbert-ir/colbertv2.0_msmarco_64way/resolve/main/examples.json?download=true",
7 "expected_md5": "8be0c71e330ac54dcd77fba058d291c7",
8 "cache_path": "msmarco-passage/train/colbert_64way.json",
9 }
10 register_new_dataset(
11 "msmarco-passage/train/colbert-docpairs",
12 docs="msmarco-passage",
13 queries="msmarco-passage/train",
14 qrels="msmarco-passage/train",
15 docpairs=dlc_contents,
16 DocpairsType=ScoredDocTuples,
17 )