Kr08 commited on
Commit
6241aa9
·
verified ·
1 Parent(s): eaed8a8

alternate translation model

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -22,14 +22,14 @@ def load_translation_model() :
22
  return model, tokenizer
23
 
24
 
25
- def alternate_translation(inputs):
26
- model, tokenizer = load_translation_model()
27
- tokenized_inputs = tokenizer(inputs, return_tensors='pt')
28
 
29
  answer = ""
30
  # for
31
- translated_tokens = model.generate(**tokenized_inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"), max_length=100)
32
- return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
33
 
34
 
35
  def load_qa_model():
@@ -107,7 +107,8 @@ def process_and_summarize(audio_file, translate, model_size, do_summarize=True):
107
  logger.info(f"Starting process_and_summarize: translate={translate}, model_size={model_size}, do_summarize={do_summarize}")
108
  try:
109
  language_segments, final_segments = transcribe_audio(audio_file, translate, model_size)
110
-
 
111
  # transcription = "Detected language changes:\n\n"
112
  transcription = ""
113
  for segment in language_segments:
@@ -120,8 +121,11 @@ def process_and_summarize(audio_file, translate, model_size, do_summarize=True):
120
  transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n"
121
  transcription += f"Original: {segment['text']}\n"
122
  if translate:
123
- transcription += f"Translated: {segment['translated']}\n"
124
- full_text += segment['translated'] + " "
 
 
 
125
  else:
126
  full_text += segment['text'] + " "
127
  transcription += "\n"
 
22
  return model, tokenizer
23
 
24
 
25
+ def alternate_translation(translation_model, translation_tokenizer, inputs):
26
+ # model, tokenizer = load_translation_model()
27
+ tokenized_inputs = translation_tokenizer(inputs, return_tensors='pt')
28
 
29
  answer = ""
30
  # for
31
+ translated_tokens = translation_model.generate(**tokenized_inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"), max_length=100)
32
+ return translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
33
 
34
 
35
  def load_qa_model():
 
107
  logger.info(f"Starting process_and_summarize: translate={translate}, model_size={model_size}, do_summarize={do_summarize}")
108
  try:
109
  language_segments, final_segments = transcribe_audio(audio_file, translate, model_size)
110
+
111
+ translation_model, translation_tokenizer = load_translation_model()
112
  # transcription = "Detected language changes:\n\n"
113
  transcription = ""
114
  for segment in language_segments:
 
121
  transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n"
122
  transcription += f"Original: {segment['text']}\n"
123
  if translate:
124
+ alt_trans=alternate_translation(translation_model, translation_tokenizer, segment['text'])
125
+ transcription += f"Translated:{alt_trans}"
126
+ full_text += alt_trans
127
+ # transcription += f"Translated: {segment['translated']}\n"
128
+ # full_text += segment['translated'] + " "
129
  else:
130
  full_text += segment['text'] + " "
131
  transcription += "\n"