File size: 1,988 Bytes
52e9e56 889d606 52e9e56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
---
license: mit
---
## Usage
### Pytorch version
pip install llm2vec
```python
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
from torch import nn
from llm2vec import LLM2Vec
from transformers import AutoTokenizer, AutoModel, AutoConfig
class LLM2VecWrapper(LLM2Vec):
def prepare_for_tokenization(self, text):
text = (
"<|start_header_id|>user<|end_header_id|>\n\n"
+ text.strip()
+ "<|eot_id|>"
)
return text
class LlamaVec_1B_FeatureExtractor(nn.Module):
def __init__(self):
super().__init__()
model_path = 'microsoft/LLM2CLIP-Llama-3.2-1B-Instruct-CC-Finetuned'
config = AutoConfig.from_pretrained(model_path)
model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
self.l2v = LLM2VecWrapper(model, tokenizer, pooling_mode="mean", max_length=512, skip_instruction=True)
def extract_features(self, text):
with torch.amp.autocast('cuda'):
reps_norm = self.l2v.encode(text)
reps_norm = torch.nn.functional.normalize(reps_norm, p=2, dim=1)
return {"embeds": reps_norm}
text_model = LlamaVec_1B_FeatureExtractor()
captions = ["this is a test"]
embeddings = text_model.extract_features(captions)
```
## BibTeX & Citation
```
@misc{huang2024llm2clippowerfullanguagemodel,
title={LLM2CLIP: Powerful Language Model Unlock Richer Visual Representation},
author={Weiquan Huang and Aoqi Wu and Yifan Yang and Xufang Luo and Yuqing Yang and Liang Hu and Qi Dai and Xiyang Dai and Dongdong Chen and Chong Luo and Lili Qiu},
year={2024},
eprint={2411.04997},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2411.04997},
}
```
|