Mamadou2727 commited on
Commit
4dcbdb7
1 Parent(s): c1f7cbf

Update app

Browse files
Files changed (1) hide show
  1. app.py +12 -37
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
  import torch
4
 
 
5
  model = AutoModelForSeq2SeqLM.from_pretrained("Mamadou2727/Feriji_model")
6
  tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -10,18 +11,21 @@ LANG_CODES = {
10
  "Zarma": "yo"
11
  }
12
 
13
- def translate(text, candidates: int):
14
  """
15
- Translate the text from French to Zarma
16
  """
17
-
18
  src = LANG_CODES["French"]
19
  tgt = LANG_CODES["Zarma"]
20
-
21
  # If a file is uploaded, use its content for translation
22
  if file is not None:
23
  text = file.read().decode("utf-8")
24
-
 
 
 
 
25
  tokenizer.src_lang = src
26
  tokenizer.tgt_lang = tgt
27
 
@@ -31,7 +35,7 @@ def translate(text, candidates: int):
31
  'return_dict_in_generate': True,
32
  'output_scores': True,
33
  'output_hidden_states': True,
34
- 'length_penalty': 0.0, # don't encourage longer or shorter output,
35
  'num_return_sequences': candidates,
36
  'num_beams': candidates,
37
  'forced_bos_token_id': tokenizer.lang_code_to_id[tgt]
@@ -43,43 +47,14 @@ def translate(text, candidates: int):
43
  return '\n'.join(output)
44
 
45
  with gr.Blocks() as app:
46
- markdown = r"""
47
- # Feriji-fr-to-dje v.1.1, Proudly made by Elysabhete, Habibatou & Mamadou K.
48
-
49
- <img src="https://cdn-uploads.huggingface.co/production/uploads/63cc1d4bf488db9bb3c6449e/AtOKLAaL5kt0VhRsxE0vf.png" width="500" height="300">
50
-
51
- Feriji-fr-to-dje is a beta version of the French to Zarma translator.
52
-
53
- ## Intended Uses & Limitations
54
-
55
- This model is intended for academic research and practical applications in machine translation. It can be used to translate French text to Zarma and vice versa. Users should note that the model's performance may vary based on the complexity and context of the input text.
56
-
57
- ## Authors:
58
- The project, **Feriji dataset and Feriji-fr-to-dje**, was curated by **Elysabhete Ibrahim Amadou** and **Mamadou K. KEITA**, with the aim to enhance linguistic studies and translation capabilities between French and Zarma.
59
-
60
- ## Citations
61
-
62
- If you use this dataset or model in your research, please cite it as follows:
63
-
64
- @dataset{Feriji,
65
- author = {Habibatou Abdoulaye Alfari, Elysabhete Ibrahim Amadou and Mamadou K. KEITA},
66
- title = {Feriji, a French-Zarma Parallel Corpus},
67
- year = 2023,
68
- publisher = {GitHub},
69
- journal = {GitHub repository},
70
- howpublished = {\url{https://github.com/27-GROUP/Feriji}}
71
- }
72
- """
73
-
74
  with gr.Row():
75
- gr.Markdown(markdown)
76
  with gr.Column():
77
  input_text = gr.components.Textbox(lines=7, label="Input Text", value="")
78
  upload_file = gr.File(label="Upload File")
79
  return_seqs = gr.Slider(label="Number of return sequences", value=1, minimum=1, maximum=12, step=1)
80
  outputs = gr.Textbox(lines=7, label="Output Text")
81
-
82
  translate_btn = gr.Button("Traduis!")
83
  translate_btn.click(translate, inputs=[input_text, upload_file, return_seqs], outputs=outputs)
84
-
85
  app.launch(share=True)
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
  import torch
4
 
5
+ # Load the model and tokenizer
6
  model = AutoModelForSeq2SeqLM.from_pretrained("Mamadou2727/Feriji_model")
7
  tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
11
  "Zarma": "yo"
12
  }
13
 
14
+ def translate(text, file, candidates: int):
15
  """
16
+ Translate the text from French to Zarma. Can use text input or uploaded file.
17
  """
 
18
  src = LANG_CODES["French"]
19
  tgt = LANG_CODES["Zarma"]
20
+
21
  # If a file is uploaded, use its content for translation
22
  if file is not None:
23
  text = file.read().decode("utf-8")
24
+
25
+ # If there's no text input or file content, return an error message or empty string
26
+ if not text.strip():
27
+ return "No text provided for translation."
28
+
29
  tokenizer.src_lang = src
30
  tokenizer.tgt_lang = tgt
31
 
 
35
  'return_dict_in_generate': True,
36
  'output_scores': True,
37
  'output_hidden_states': True,
38
+ 'length_penalty': 0.0,
39
  'num_return_sequences': candidates,
40
  'num_beams': candidates,
41
  'forced_bos_token_id': tokenizer.lang_code_to_id[tgt]
 
47
  return '\n'.join(output)
48
 
49
  with gr.Blocks() as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  with gr.Row():
51
+ gr.Markdown("Your markdown content here")
52
  with gr.Column():
53
  input_text = gr.components.Textbox(lines=7, label="Input Text", value="")
54
  upload_file = gr.File(label="Upload File")
55
  return_seqs = gr.Slider(label="Number of return sequences", value=1, minimum=1, maximum=12, step=1)
56
  outputs = gr.Textbox(lines=7, label="Output Text")
 
57
  translate_btn = gr.Button("Traduis!")
58
  translate_btn.click(translate, inputs=[input_text, upload_file, return_seqs], outputs=outputs)
59
+
60
  app.launch(share=True)