import os os.system('pip install -q -e .') os.system('pip uninstall bitsandbytes') os.system('pip install bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl') os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" import torch print(torch.cuda.is_available()) print(os.system('python -m bitsandbytes')) import gradio as gr import io from contextlib import redirect_stdout import openai import torch from transformers import AutoTokenizer, BitsAndBytesConfig from llava.model import LlavaMistralForCausalLM from llava.eval.run_llava import eval_model # LLaVa-Med model setup model_path = "Veda0718/llava-med-v1.5-mistral-7b-finetuned" kwargs = {"device_map": "auto"} kwargs['load_in_4bit'] = True kwargs['quantization_config'] = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4' ) model = LlavaMistralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs) tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) with gr.Blocks(theme=gr.themes.Monochrome()) as app: with gr.Column(scale=1): gr.Markdown("