File size: 3,766 Bytes
9ff79dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import importlib

from colpali_engine.models.clip_baselines import ColSigLIP, SigLIP
from colpali_engine.models.colbert_architectures import (
    BiBERT,
    BiXLMRoBERTa,
    ColBERT,
    ColCamembert,
    ColLlama,
    ColXLMRoBERTa,
)
from colpali_engine.models.idefics_colbert_architecture import BiIdefics, ColIdefics
from colpali_engine.models.paligemma_colbert_architecture import (
    BiNewSiglip,
    BiPaliLast,
    BiPaliMean,
    ColNewSiglip,
    ColPali,
)

if importlib.util.find_spec("transformers") is not None:
    from transformers import AutoProcessor, AutoTokenizer
    from transformers.tokenization_utils import PreTrainedTokenizer

    class AutoProcessorWrapper:
        def __new__(cls, *args, **kwargs):
            return AutoProcessor.from_pretrained(*args, **kwargs)

    class AutoTokenizerWrapper(PreTrainedTokenizer):
        def __new__(cls, *args, **kwargs):
            return AutoTokenizer.from_pretrained(*args, **kwargs)

    class AutoColModelWrapper:
        def __new__(cls, *args, **kwargs):
            pretrained_model_name_or_path = None
            if args:
                pretrained_model_name_or_path = args[0]
            elif kwargs:
                pretrained_model_name_or_path = kwargs["pretrained_model_name_or_path"]

            training_objective = kwargs.pop("training_objective", "colbertv1")

            if "camembert" in pretrained_model_name_or_path:
                return ColCamembert.from_pretrained(*args, **kwargs)
            elif "xlm-roberta" in pretrained_model_name_or_path:
                if training_objective == "biencoder":
                    return BiXLMRoBERTa.from_pretrained(*args, **kwargs)
                return ColXLMRoBERTa.from_pretrained(*args, **kwargs)
            elif (
                "llama" in pretrained_model_name_or_path.lower() or "croissant" in pretrained_model_name_or_path.lower()
            ):
                return ColLlama.from_pretrained(*args, **kwargs)
            elif "idefics2" in pretrained_model_name_or_path:
                if training_objective == "biencoder":
                    return BiIdefics.from_pretrained(*args, **kwargs)
                return ColIdefics.from_pretrained(*args, **kwargs)
            elif "siglip" in pretrained_model_name_or_path:
                if training_objective == "biencoder_mean":
                    return SigLIP.from_pretrained(*args, **kwargs)
                elif training_objective == "colbertv1":
                    return ColSigLIP.from_pretrained(*args, **kwargs)
                else:
                    raise ValueError(f"Training objective {training_objective} not recognized")
            elif "paligemma" in pretrained_model_name_or_path:
                if training_objective == "biencoder_mean":
                    return BiPaliMean.from_pretrained(*args, **kwargs)
                elif training_objective == "biencoder_last":
                    return BiPaliLast.from_pretrained(*args, **kwargs)
                elif training_objective == "biencoder_mean_vision":
                    return BiNewSiglip.from_pretrained(*args, **kwargs)
                elif training_objective == "colbertv1_vision":
                    return ColNewSiglip.from_pretrained(*args, **kwargs)
                elif training_objective == "colbertv1":
                    return ColPali.from_pretrained(*args, **kwargs)
                else:
                    raise ValueError(f"Training objective {training_objective} not recognized")
            else:
                if training_objective == "biencoder":
                    return BiBERT.from_pretrained(*args, **kwargs)
                return ColBERT.from_pretrained(*args, **kwargs)

else:
    raise ModuleNotFoundError("Transformers must be loaded")