Dubbed Spoonbill Garuda version used instruction tuned sugiv/garuda-from-llama2-7B-chat as languade model. The above said Spoonbill Garuda is also vision-language model which was also trained on visual instruction datasets (limited from Otter).
Please refer to license of Llama2 from which Garuda was derived with Alpaca dataset.
The above is also fine-tuned on visual instruction tuning datasets derived from Otter's datasets.
@software{anas_awadalla_2023_7733589, author = {Awadalla, Anas and Gao, Irena and Gardner, Joshua and Hessel, Jack and Hanafy, Yusuf and Zhu, Wanrong and Marathe, Kalyani and Bitton, Yonatan and Gadre, Samir and Jitsev, Jenia and Kornblith, Simon and Koh, Pang Wei and Ilharco, Gabriel and Wortsman, Mitchell and Schmidt, Ludwig}, title = {OpenFlamingo}, month = mar, year = 2023, publisher = {Zenodo}, version = {v0.1.1}, doi = {10.5281/zenodo.7733589}, url = {https://doi.org/10.5281/zenodo.7733589} }
@article{li2023otter, title={Otter: A Multi-Modal Model with In-Context Instruction Tuning}, author={Li, Bo and Zhang, Yuanhan and Chen, Liangyu and Wang, Jinghao and Yang, Jingkang and Liu, Ziwei}, journal={arXiv preprint arXiv:2305.03726}, year={2023} }
@article{li2023mimicit, title={MIMIC-IT: Multi-Modal In-Context Instruction Tuning}, author={Bo Li and Yuanhan Zhang and Liangyu Chen and Jinghao Wang and Fanyi Pu and Jingkang Yang and Chunyuan Li and Ziwei Liu}, year={2023}, eprint={2306.05425}, archivePrefix={arXiv}, primaryClass={cs.CV} }
- Downloads last month
- 0