import gradio as gr ############### VANILLA INFERENCE ############### # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K" # src_lang="ru" # tgt_lang="zu" # # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang) # tokenizer = AutoTokenizer.from_pretrained(model_path) # model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_safetensors=True)#, load_in_4bit=True, device_map="auto") # model.to_bettertransformer() # def translate(text, num_beams=4, num_return_sequences=4): # inputs = tokenizer(text, return_tensors="pt") # num_return_sequences = min(num_return_sequences, num_beams) # translated_tokens = model.generate( # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences # ) # translations = [] # for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True): # translations.append(translation) # # result = {"input":text, "translations":translations} # return text, translations ############### IPEX OPTIMIZED INFERENCE ############### # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # from optimum.bettertransformer import BetterTransformer # import intel_extension_for_pytorch as ipex # from transformers.modeling_outputs import BaseModelOutput # import torch # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K" # src_lang = "ru" # tgt_lang = "zu" # tokenizer = AutoTokenizer.from_pretrained(model_path) # model = AutoModelForSeq2SeqLM.from_pretrained(model_path) # # flash attention optimization # model = BetterTransformer.transform(model, keep_original_model=False) # # ipex optimization # model.eval() # model = ipex.optimize(model, dtype=torch.float, level="O1", conv_bn_folding=False, inplace=True) # # Get the encoder # encoder = model.get_encoder() # # Prepare an example input for the encoder # example_input_text = "Example text in Russian" # inputs_example = tokenizer(example_input_text, return_tensors="pt") # # Trace just the encoder with strict=False # scripted_encoder = torch.jit.trace(encoder, inputs_example['input_ids'], strict=False) # def translate(text, num_beams=4, num_return_sequences=4): # inputs = tokenizer(text, return_tensors="pt") # num_return_sequences = min(num_return_sequences, num_beams) # # Use the scripted encoder for the first step of inference # encoder_output_dict = scripted_encoder(inputs['input_ids']) # encoder_outputs = BaseModelOutput(last_hidden_state=encoder_output_dict['last_hidden_state']) # # Use the original, untraced model for the second step, passing the encoder's outputs as inputs # translated_tokens = model.generate( # encoder_outputs=encoder_outputs, # forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], # num_beams=num_beams, # num_return_sequences=num_return_sequences # ) # translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens] # return text, translations # ############### ONNX MODEL INFERENCE ############### # from transformers import AutoTokenizer, pipeline # from optimum.onnxruntime import ORTModelForSeq2SeqLM # model_id = "anzorq/m2m100_418M_ft_ru-kbd_44K" # model = ORTModelForSeq2SeqLM.from_pretrained(model_id, subfolder="onnx", file_name="encoder_model_optimized.onnx") # tokenizer = AutoTokenizer.from_pretrained(model_id) # def translate(text, num_beams=4, num_return_sequences=4): # inputs = tokenizer(text, return_tensors="pt") # num_return_sequences = min(num_return_sequences, num_beams) # translated_tokens = model.generate( # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["zu"], num_beams=num_beams, num_return_sequences=num_return_sequences # ) # translations = [] # for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True): # translations.append(translation) # return text, translations ############### CTRANSLATE2 INFERENCE ############### import ctranslate2 import transformers translator = ctranslate2.Translator("ctranslate2") tokenizer = transformers.AutoTokenizer.from_pretrained("anzorq/m2m100_418M_ft_ru-kbd_44K") def translate(text, num_beams=4, num_return_sequences=4): num_return_sequences = min(num_return_sequences, num_beams) source = tokenizer.convert_ids_to_tokens(tokenizer.encode(text)) target_prefix = [tokenizer.lang_code_to_token["zu"]] results = translator.translate_batch( [source], target_prefix=[target_prefix], beam_size=num_beams, num_hypotheses=num_return_sequences ) translations = [] for hypothesis in results[0].hypotheses: target = hypothesis[1:] decoded_sentence = tokenizer.decode(tokenizer.convert_tokens_to_ids(target)) translations.append(decoded_sentence) return text, translations output = gr.Textbox() # with gr.Accordion("Advanced Options"): num_beams = gr.Slider(2, 10, step=1, label="Number of beams", value=4) num_return_sequences = gr.Slider(2, 10, step=1, label="Number of returned sentences", value=4) title = "Russian-Circassian translator demo" article = "
Want to help? Join the Discord server
" # examples = [ # ["Мы идем домой"], # ["Сегодня хорошая погода"], # ["Дети играют во дворе"], # ["We live in a big house"], # ["Tu es une bonne personne."], # ["أين تعيش؟"], # ["Bir şeyler yapmak istiyorum."], # ["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."], # ["Как только старик ушел, Сатаней пошла к Саусырыко."], # ["我永远不会放弃你。"], # ["우리는 소치에 살고 있습니다."], # ] gr.Interface( fn=translate, inputs=["text", num_beams, num_return_sequences], outputs=["text", output], title=title, # examples=examples, article=article).launch() # import gradio as gr # title = "Русско-черкесский переводчик" # description = "Demo of a Russian-Circassian (Kabardian dialect) translator.