Spaces:

Mamadou2727
/

Feriji_Translator

Sleeping

File size: 3,166 Bytes

e87b210
d3ee320
b507f98
e87b210
0bb036a
15c2e5c
b507f98
 
0f9899f
 
b507f98
 
f320f41
b507f98
d3ee320
b507f98
d3ee320
0f9899f
 
4dcbdb7
b507f98
 
 
 
 
 
0f9899f
 
 
d3ee320
0f9899f
 
 
 
b507f98
 
 
 
 
 
 
d3ee320
5af9412
d3ee320
 
5af9412
d3ee320
 
 
 
 
 
5af9412
d3ee320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b507f98
d3ee320
b507f98
5af9412
008cf16
5af9412
f320f41
d3ee320
 
4dcbdb7
3746217

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch

model = AutoModelForSeq2SeqLM.from_pretrained("Mamadou2727/Feriji_model")
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
LANG_CODES = {
    "French": "fr",
    "Zarma": "yo"
}

def translate(text, candidates: int):
    """
    Translate the text from French to Zarma
    """

    src = LANG_CODES["French"]
    tgt = LANG_CODES["Zarma"]

    tokenizer.src_lang = src
    tokenizer.tgt_lang = tgt

    ins = tokenizer(text, return_tensors='pt').to(device)

    gen_args = {
        'return_dict_in_generate': True,
        'output_scores': True,
        'output_hidden_states': True,
        'length_penalty': 0.0,  # don't encourage longer or shorter output,
        'num_return_sequences': candidates,
        'num_beams': candidates,
        'forced_bos_token_id': tokenizer.lang_code_to_id[tgt]
    }

    outs = model.generate(**{**ins, **gen_args})
    output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)

    return '\n'.join(output)

with gr.Blocks() as app:
    markdown = r"""
        # FERIJI Translator, The First French-Zarma Translator        
        <img src="https://cdn-uploads.huggingface.co/production/uploads/63cc1d4bf488db9bb3c6449e/AtOKLAaL5kt0VhRsxE0vf.png" width="500" height="300">
        
        This is a beta version of the French to Zarma translator.
            
        ## Intended Uses & Limitations        
        
        This model is intended for academic research and practical applications in machine translation. It can be used to translate French text to Zarma and vice versa. Users should note that the model's performance may vary based on the complexity and context of the input text.
            
        ## Authors:
        The project, **FERIJI**, was curated by **Elysabhete Ibrahim Amadou**, **Habibatou Abdoulaye Alfari**, **Adwoa Bremang**, **Dennis Owusu**, **Mamadou K. KEITA** and **Dr Christopher Homan**, with the aim to enhance linguistic studies for Zarma.
            
        ## Citations
            
        If you use this dataset or model in your research, please cite it as follows:
            
        @dataset{Feriji,
          author       = {Habibatou Abdoulaye Alfari, Elysabhete Ibrahim Amadou and Mamadou K. KEITA},
          title        = {Feriji, a French-Zarma Parallel Corpus},
          year         = 2023,
          publisher    = {GitHub},
          journal      = {GitHub repository},
          howpublished = {\url{https://github.com/27-GROUP/Feriji}}
        }
    """

    with gr.Row():
        gr.Markdown(markdown)
        with gr.Column():
            input_text = gr.components.Textbox(lines=7, label="Français/French", value="")
            return_seqs = gr.Slider(label="Number of return sequences", value=1, minimum=1, maximum=12, step=1)
            outputs = gr.Textbox(lines=7, label="Zarma")

            translate_btn = gr.Button("Traduis!")
            translate_btn.click(translate, inputs=[input_text, return_seqs], outputs=outputs)

app.launch(share=True)