dwb2023 commited on
Commit
b3ee19f
·
verified ·
1 Parent(s): 68bab0c

Update app.py

Browse files

change bnb config

Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -18,7 +18,7 @@ from transformers import (
18
  )
19
  from transformers.pipelines.audio_utils import ffmpeg_read
20
 
21
- # import torch # If you're using PyTorch
22
  import spaces
23
 
24
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
@@ -28,12 +28,21 @@ BATCH_SIZE = 8
28
  YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
29
 
30
  # Quantization
31
- bnb_config = BitsAndBytesConfig(load_in_4bit=True)
 
 
 
 
 
 
 
32
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
33
  MODEL_NAME,
34
  quantization_config=bnb_config,
 
35
  device_map="auto"
36
  )
 
37
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
38
  feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
39
 
 
18
  )
19
  from transformers.pipelines.audio_utils import ffmpeg_read
20
 
21
+ import torch # If you're using PyTorch
22
  import spaces
23
 
24
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
28
  YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
29
 
30
  # Quantization
31
+
32
+ bnb_config = BitsAndBytesConfig(
33
+ load_in_4bit=True,
34
+ bnb_4bit_use_double_quant=True,
35
+ bnb_4bit_quant_type="nf4",
36
+ bnb_4bit_compute_dtype=torch.bfloat16
37
+ )
38
+
39
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
40
  MODEL_NAME,
41
  quantization_config=bnb_config,
42
+ use_cache=False,
43
  device_map="auto"
44
  )
45
+
46
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
47
  feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
48