Spaces:
Build error
Build error
alternate translation model
Browse files
app.py
CHANGED
@@ -22,14 +22,14 @@ def load_translation_model() :
|
|
22 |
return model, tokenizer
|
23 |
|
24 |
|
25 |
-
def alternate_translation(inputs):
|
26 |
-
model, tokenizer = load_translation_model()
|
27 |
-
tokenized_inputs =
|
28 |
|
29 |
answer = ""
|
30 |
# for
|
31 |
-
translated_tokens =
|
32 |
-
return
|
33 |
|
34 |
|
35 |
def load_qa_model():
|
@@ -107,7 +107,8 @@ def process_and_summarize(audio_file, translate, model_size, do_summarize=True):
|
|
107 |
logger.info(f"Starting process_and_summarize: translate={translate}, model_size={model_size}, do_summarize={do_summarize}")
|
108 |
try:
|
109 |
language_segments, final_segments = transcribe_audio(audio_file, translate, model_size)
|
110 |
-
|
|
|
111 |
# transcription = "Detected language changes:\n\n"
|
112 |
transcription = ""
|
113 |
for segment in language_segments:
|
@@ -120,8 +121,11 @@ def process_and_summarize(audio_file, translate, model_size, do_summarize=True):
|
|
120 |
transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n"
|
121 |
transcription += f"Original: {segment['text']}\n"
|
122 |
if translate:
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
125 |
else:
|
126 |
full_text += segment['text'] + " "
|
127 |
transcription += "\n"
|
|
|
22 |
return model, tokenizer
|
23 |
|
24 |
|
25 |
+
def alternate_translation(translation_model, translation_tokenizer, inputs):
|
26 |
+
# model, tokenizer = load_translation_model()
|
27 |
+
tokenized_inputs = translation_tokenizer(inputs, return_tensors='pt')
|
28 |
|
29 |
answer = ""
|
30 |
# for
|
31 |
+
translated_tokens = translation_model.generate(**tokenized_inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids("eng_Latn"), max_length=100)
|
32 |
+
return translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
33 |
|
34 |
|
35 |
def load_qa_model():
|
|
|
107 |
logger.info(f"Starting process_and_summarize: translate={translate}, model_size={model_size}, do_summarize={do_summarize}")
|
108 |
try:
|
109 |
language_segments, final_segments = transcribe_audio(audio_file, translate, model_size)
|
110 |
+
|
111 |
+
translation_model, translation_tokenizer = load_translation_model()
|
112 |
# transcription = "Detected language changes:\n\n"
|
113 |
transcription = ""
|
114 |
for segment in language_segments:
|
|
|
121 |
transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n"
|
122 |
transcription += f"Original: {segment['text']}\n"
|
123 |
if translate:
|
124 |
+
alt_trans=alternate_translation(translation_model, translation_tokenizer, segment['text'])
|
125 |
+
transcription += f"Translated:{alt_trans}"
|
126 |
+
full_text += alt_trans
|
127 |
+
# transcription += f"Translated: {segment['translated']}\n"
|
128 |
+
# full_text += segment['translated'] + " "
|
129 |
else:
|
130 |
full_text += segment['text'] + " "
|
131 |
transcription += "\n"
|