Bajiyo commited on
Commit
d0d4c5d
·
verified ·
1 Parent(s): 72c37b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -1,19 +1,36 @@
1
- from transformers import AutoTokenizer, TFBertForSeq2SeqLM # Assuming TFBert model
2
-
3
- # Load tokenizer configurations
4
- source_tokenizer = AutoTokenizer.from_pretrained("https://huggingface.co/Bajiyo/mal_en_transliteration/tree/main/source_tokenizer_config.json")
5
- target_tokenizer = AutoTokenizer.from_pretrained("https://huggingface.co/Bajiyo/mal_en_transliteration/tree/main/target_tokenizer_config.json")
6
  from tensorflow.keras.models import load_model
 
 
 
7
 
 
8
  model = load_model("https://huggingface.co/Bajiyo/mal_en_transliteration/tree/main/transliteration_model.h5")
9
- # Load the model (replace with your actual model path)
10
- #model = TFBertForSeq2SeqLM.from_pretrained("https://huggingface.co/Bajiyo/mal_en_transliteration/tree/main/transliteration_model.h5")
 
 
 
 
 
 
 
 
 
11
 
12
  def translate(malayalam_text):
13
- """Function to perform Malayalam to English transliteration"""
14
- source_ids = source_tokenizer(malayalam_text, return_tensors="pt")["input_ids"]
15
- translated_tokens = model.generate(**source_ids)
16
- english_text = target_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
 
 
17
  return english_text
18
 
19
  interface = gradio.Interface(
 
 
 
 
 
 
1
  from tensorflow.keras.models import load_model
2
+ from tensorflow.keras.preprocessing.text import Tokenizer
3
+ import json
4
+ from gradio import Interface
5
 
6
+ # Load model (replace with your actual path)
7
  model = load_model("https://huggingface.co/Bajiyo/mal_en_transliteration/tree/main/transliteration_model.h5")
8
+
9
+ # Load tokenizers from configuration files (replace with your paths)
10
+ with open("https://huggingface.co/Bajiyo/mal_en_transliteration/tree/main/source_tokenizer_config.json", "r") as f:
11
+ source_tokenizer_config = json.load(f)
12
+ source_tokenizer = Tokenizer(num_words=source_tokenizer_config["num_words"])
13
+ source_tokenizer.fit_on_texts(source_tokenizer_config["texts"]) # Assuming pre-defined texts
14
+
15
+ with open("https://huggingface.co/Bajiyo/mal_en_transliteration/tree/main/target_tokenizer_config.json", "r") as f:
16
+ target_tokenizer_config = json.load(f)
17
+ target_tokenizer = Tokenizer(num_words=target_tokenizer_config["num_words"])
18
+ target_tokenizer.fit_on_texts(target_tokenizer_config["texts"]) # Assuming pre-defined texts
19
 
20
  def translate(malayalam_text):
21
+ # Preprocessing (tokenization)
22
+ source_tokens = source_tokenizer.texts_to_sequences([malayalam_text])[0]
23
+
24
+ # Padding (adjust maxlen based on your model's requirements)
25
+ maxlen = 100 # Example value, adjust as needed
26
+ padded_text = pad_sequences([source_tokens], maxlen=maxlen, padding="post")
27
+
28
+ # Make predictions using the model
29
+ predictions = model.predict(padded_text)
30
+
31
+ # Postprocessing (decoding)
32
+ english_text = target_tokenizer.sequences_to_texts([predictions[0]])[0]
33
+
34
  return english_text
35
 
36
  interface = gradio.Interface(