|
# GLM-4V-9B-4bits |
|
|
|
|
|
## Quick Start |
|
|
|
```python |
|
|
|
import torch |
|
from PIL import Image |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
device = "cuda" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("vcadillo/glm-4v-9b-4-bits", trust_remote_code=True) |
|
|
|
query = 'discribe this image' |
|
image = Image.open("your image").convert('RGB') |
|
inputs = tokenizer.apply_chat_template([{"role": "user", "image": image, "content": query}], |
|
add_generation_prompt=True, tokenize=True, return_tensors="pt", |
|
return_dict=True) # chat mode |
|
|
|
inputs = inputs.to(device) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
"vcadillo/glm-4v-9b-4-bits", |
|
torch_dtype=torch.bfloat16, |
|
low_cpu_mem_usage=True, |
|
trust_remote_code=True, |
|
device_map='auto', |
|
).eval() |
|
|
|
gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} |
|
with torch.no_grad(): |
|
outputs = model.generate(**inputs, **gen_kwargs) |
|
outputs = outputs[:, inputs['input_ids'].shape[1]:] |
|
print(tokenizer.decode(outputs[0])) |
|
``` |
|
|
|
## License |
|
|
|
The use of the GLM-4 model weights needs to comply with the [LICENSE](LICENSE). |
|
|
|
## Citation |
|
|
|
If you find our work helpful, please consider citing the following papers. |
|
|
|
``` |
|
@article{zeng2022glm, |
|
title={Glm-130b: An open bilingual pre-trained model}, |
|
author={Zeng, Aohan and Liu, Xiao and Du, Zhengxiao and Wang, Zihan and Lai, Hanyu and Ding, Ming and Yang, Zhuoyi and Xu, Yifan and Zheng, Wendi and Xia, Xiao and others}, |
|
journal={arXiv preprint arXiv:2210.02414}, |
|
year={2022} |
|
} |
|
``` |
|
|
|
``` |
|
@inproceedings{du2022glm, |
|
title={GLM: General Language Model Pretraining with Autoregressive Blank Infilling}, |
|
author={Du, Zhengxiao and Qian, Yujie and Liu, Xiao and Ding, Ming and Qiu, Jiezhong and Yang, Zhilin and Tang, Jie}, |
|
booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, |
|
pages={320--335}, |
|
year={2022} |
|
} |
|
``` |
|
|
|
``` |
|
@misc{wang2023cogvlm, |
|
title={CogVLM: Visual Expert for Pretrained Language Models}, |
|
author={Weihan Wang and Qingsong Lv and Wenmeng Yu and Wenyi Hong and Ji Qi and Yan Wang and Junhui Ji and Zhuoyi Yang and Lei Zhao and Xixuan Song and Jiazheng Xu and Bin Xu and Juanzi Li and Yuxiao Dong and Ming Ding and Jie Tang}, |
|
year={2023}, |
|
eprint={2311.03079}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CV} |
|
} |
|
``` |
|
|
|
|