Shubham09 commited on
Commit
cb420c2
·
1 Parent(s): 93ad2c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -8,9 +8,9 @@ nltk.download("punkt")
8
 
9
 
10
  model_name = "Shubham09/whisper31filescheck"
11
- processor = WhisperProcessor.from_pretrained(model_name)
12
- tokenizer = WhisperTokenizer.from_pretrained(model_name)
13
- model = WhisperForConditionalGeneration.from_pretrained(model_name,decoder_input_ids)
14
 
15
  def load_data(input_file):
16
 
@@ -33,14 +33,14 @@ def asr_transcript(input_file):
33
 
34
  speech = load_data(input_file)
35
  #Tokenize
36
- input_features = processor(speech, return_tensors="pt").input_features #, padding="longest" , return_tensors="pt"
37
  #input_values = tokenizer(speech, return_tensors="pt").input_values
38
  #Take logits
39
  logits = model(input_features).logits
40
  #Take argmax
41
  predicted_ids = torch.argmax(logits, dim=-1)
42
  #Get the words from predicted word ids
43
- transcription = processor.batch_decode(predicted_ids[0])
44
  #Correcting the letter casing
45
  #transcription = correct_casing(transcription.lower())
46
  return transcription
 
8
 
9
 
10
  model_name = "Shubham09/whisper31filescheck"
11
+ processor = WhisperProcessor.from_pretrained(model_name,task="transcribe")
12
+ #tokenizer = WhisperTokenizer.from_pretrained(model_name)
13
+ model = WhisperForConditionalGeneration.from_pretrained(model_name)
14
 
15
  def load_data(input_file):
16
 
 
33
 
34
  speech = load_data(input_file)
35
  #Tokenize
36
+ input_features = processor(speech).input_features #, padding="longest" , return_tensors="pt"
37
  #input_values = tokenizer(speech, return_tensors="pt").input_values
38
  #Take logits
39
  logits = model(input_features).logits
40
  #Take argmax
41
  predicted_ids = torch.argmax(logits, dim=-1)
42
  #Get the words from predicted word ids
43
+ transcription = processor.batch_decode(predicted_ids)
44
  #Correcting the letter casing
45
  #transcription = correct_casing(transcription.lower())
46
  return transcription