John6666 commited on
Commit
24e23b4
·
verified ·
1 Parent(s): b27b4a2

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +5 -11
  2. requirements.txt +1 -1
app.py CHANGED
@@ -6,33 +6,27 @@ from threading import Thread
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
  from PIL import ImageDraw
8
  from torchvision.transforms.v2 import Resize
9
- from optimum.onnxruntime import ORTModelForImageClassification
10
  import subprocess
11
  #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
- #model_id = "vikhyatk/moondream2"
16
- model_id = "Xenova/moondream2"
17
  #model_id = "zesquirrelnator/moondream2-finetuneV2"
18
  #revision = "2024-08-26"
19
  #tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
20
  tokenizer = AutoTokenizer.from_pretrained(model_id)
21
- #moondream = AutoModelForCausalLM.from_pretrained(
22
- # model_id, trust_remote_code=True, #revision=revision,
23
- # torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, #device_map="auto",
24
  #ignore_mismatched_sizes=True,
25
  #attn_implementation="flash_attention_2"
26
- #).to(device)
27
- moondream = ORTModelForImageClassification.from_pretrained(
28
- model_id, trust_remote_code=True,
29
  ).to(device)
30
-
31
  moondream.eval()
32
  #moondream.to_bettertransformer()
33
 
34
  #@spaces.GPU
35
- @torch.inference_mode()
36
  def answer_question(img, prompt):
37
  image_embeds = moondream.encode_image(img)
38
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
 
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
  from PIL import ImageDraw
8
  from torchvision.transforms.v2 import Resize
9
+
10
  import subprocess
11
  #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
+ model_id = "vikhyatk/moondream2"
 
16
  #model_id = "zesquirrelnator/moondream2-finetuneV2"
17
  #revision = "2024-08-26"
18
  #tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
19
  tokenizer = AutoTokenizer.from_pretrained(model_id)
20
+ moondream = AutoModelForCausalLM.from_pretrained(
21
+ model_id, trust_remote_code=True, #revision=revision,
22
+ torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, #device_map="auto",
23
  #ignore_mismatched_sizes=True,
24
  #attn_implementation="flash_attention_2"
 
 
 
25
  ).to(device)
 
26
  moondream.eval()
27
  #moondream.to_bettertransformer()
28
 
29
  #@spaces.GPU
 
30
  def answer_question(img, prompt):
31
  image_embeds = moondream.encode_image(img)
32
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
requirements.txt CHANGED
@@ -5,5 +5,5 @@ accelerate>=0.32.1
5
  numpy<2
6
  torch==2.4.0
7
  torchvision
8
- optimum[onnxruntime]
9
  intel_extension_for_pytorch
 
5
  numpy<2
6
  torch==2.4.0
7
  torchvision
8
+ optimum
9
  intel_extension_for_pytorch