sdinger commited on
Commit
c6bae0c
1 Parent(s): ad2b74f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -0
README.md CHANGED
@@ -1,6 +1,32 @@
1
  ---
2
  library_name: peft
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  ## Training procedure
5
 
6
 
@@ -18,3 +44,6 @@ The following `bitsandbytes` quantization config was used during training:
18
 
19
 
20
  - PEFT 0.5.0
 
 
 
 
1
  ---
2
  library_name: peft
3
+
4
  ---
5
+
6
+ ## Config
7
+ model_name_or_path = "openai/whisper-large-v2"
8
+ language = "Marathi"
9
+ language_abbr = "mr"
10
+ task = "transcribe"
11
+ dataset_name = "mozilla-foundation/common_voice_11_0"
12
+
13
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, language=language, task=task)
15
+ processor = AutoProcessor.from_pretrained(model_name_or_path, language=language, task=task)
16
+
17
+
18
+ common_voice["train"] = load_dataset(dataset_name, language_abbr, split="train+validation", use_auth_token=True)
19
+ common_voice["test"] = load_dataset(dataset_name, language_abbr, split="test", use_auth_token=True)
20
+
21
+
22
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, load_in_8bit=True, device_map="auto")
23
+ config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
24
+
25
+ model = get_peft_model(model, config)
26
+ model.print_trainable_parameters()
27
+ #"trainable params: 15728640 || all params: 1559033600 || trainable%: 1.0088711365810203"
28
+
29
+
30
  ## Training procedure
31
 
32
 
 
44
 
45
 
46
  - PEFT 0.5.0
47
+
48
+
49
+ wer=38.514602540132806