Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import spaces
|
|
10 |
import subprocess
|
11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
12 |
|
13 |
-
|
14 |
|
15 |
tokenizer = AutoTokenizer.from_pretrained(
|
16 |
'qnguyen3/nanoLLaVA',
|
@@ -38,7 +38,8 @@ class KeywordsStoppingCriteria(StoppingCriteria):
|
|
38 |
self.keyword_ids.append(torch.tensor(cur_keyword_ids))
|
39 |
self.tokenizer = tokenizer
|
40 |
self.start_len = input_ids.shape[1]
|
41 |
-
|
|
|
42 |
def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
43 |
offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
|
44 |
self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
|
@@ -51,7 +52,8 @@ class KeywordsStoppingCriteria(StoppingCriteria):
|
|
51 |
if keyword in outputs:
|
52 |
return True
|
53 |
return False
|
54 |
-
|
|
|
55 |
def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
56 |
outputs = []
|
57 |
for i in range(output_ids.shape[0]):
|
|
|
10 |
import subprocess
|
11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
12 |
|
13 |
+
torch.set_default_device('cuda')
|
14 |
|
15 |
tokenizer = AutoTokenizer.from_pretrained(
|
16 |
'qnguyen3/nanoLLaVA',
|
|
|
38 |
self.keyword_ids.append(torch.tensor(cur_keyword_ids))
|
39 |
self.tokenizer = tokenizer
|
40 |
self.start_len = input_ids.shape[1]
|
41 |
+
|
42 |
+
@spaces.GPU
|
43 |
def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
44 |
offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
|
45 |
self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
|
|
|
52 |
if keyword in outputs:
|
53 |
return True
|
54 |
return False
|
55 |
+
|
56 |
+
@spaces.GPU
|
57 |
def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
58 |
outputs = []
|
59 |
for i in range(output_ids.shape[0]):
|