sdinger commited on
Commit
ea59cf3
1 Parent(s): 9e6eb69

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +21 -21
README.md CHANGED
@@ -4,30 +4,30 @@ library_name: peft
4
  ---
5
 
6
  ## Config
 
 
 
 
 
 
7
 
8
- model_name_or_path = "openai/whisper-large-v2"
9
- language = "Marathi"
10
- language_abbr = "mr"
11
- task = "transcribe"
12
- dataset_name = "mozilla-foundation/common_voice_11_0"
13
-
14
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
15
- tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, language=language, task=task)
16
- processor = AutoProcessor.from_pretrained(model_name_or_path, language=language, task=task)
17
-
18
-
19
- common_voice["train"] = load_dataset(dataset_name, language_abbr, split="train+validation", use_auth_token=True)
20
- common_voice["test"] = load_dataset(dataset_name, language_abbr, split="test", use_auth_token=True)
21
-
22
-
23
- model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, load_in_8bit=True, device_map="auto")
24
- config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
25
-
26
- model = get_peft_model(model, config)
27
- model.print_trainable_parameters()
28
- #"trainable params: 15728640 || all params: 1559033600 || trainable%: 1.0088711365810203"
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  ## Training procedure
32
 
33
 
 
4
  ---
5
 
6
  ## Config
7
+ ```python
8
+ model_name_or_path = "openai/whisper-large-v2"
9
+ language = "Marathi"
10
+ language_abbr = "mr"
11
+ task = "transcribe"
12
+ dataset_name = "mozilla-foundation/common_voice_11_0"
13
 
14
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, language=language, task=task)
16
+ processor = AutoProcessor.from_pretrained(model_name_or_path, language=language, task=task)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
+ common_voice["train"] = load_dataset(dataset_name, language_abbr, split="train+validation", use_auth_token=True)
20
+ common_voice["test"] = load_dataset(dataset_name, language_abbr, split="test", use_auth_token=True)
21
+
22
+
23
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, load_in_8bit=True, device_map="auto")
24
+ config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
25
+
26
+ model = get_peft_model(model, config)
27
+ model.print_trainable_parameters()
28
+ #"trainable params: 15728640 || all params: 1559033600 || trainable%: 1.0088711365810203"
29
+ ```
30
+
31
  ## Training procedure
32
 
33