Initial

Browse files

Files changed (14) hide show

.idea/.gitignore +3 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/language-translator-ml-codes.iml +8 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
LangTransGui.py +266 -0
demo.py +45 -0
eng-french.txt +0 -0
langTraining.py +125 -0
s2s/keras_metadata.pb +3 -0
s2s/saved_model.pb +3 -0
s2s/variables/variables.data-00000-of-00001 +0 -0
s2s/variables/variables.index +0 -0
training_data.pkl +0 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/language-translator-ml-codes.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/language-translator-ml-codes.iml" filepath="$PROJECT_DIR$/.idea/language-translator-ml-codes.iml" />
+    </modules>
+  </component>
+</project>

LangTransGui.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import tkinter
+from tkinter import messagebox
+from tkinter import *
+import pickle
+import numpy as np
+from sklearn.feature_extraction.text import CountVectorizer
+from tensorflow.keras.models import Model
+from tensorflow.keras import models
+from tensorflow.keras.layers import Input, LSTM, Dense
+import speech_recognition as sr
+import pyttsx3
+BG_GRAY = "#ABB2B9"
+BG_COLOR = "#000"
+TEXT_COLOR = "#000"
+FONT = "Melvetica 14"
+FONT_BOLD = "Melvetica 13 bold"
+cv = CountVectorizer(binary=True, tokenizer=lambda txt: txt.split(), stop_words=None, analyzer='char')
+class LangTRans:
+    def __init__(self):
+        # initialize tkinter window and load the file
+        self.window = Tk()
+        self.demo_window()
+        self.datafile()
+    def datafile(self):
+        # get all datas from datafile and load the model.
+        datafile = pickle.load(open("training_data.pkl", "rb"))
+        self.input_characters = datafile['input_characters']
+        self.target_characters = datafile['target_characters']
+        self.max_input_length = datafile['max_input_length']
+        self.max_target_length = datafile['max_target_length']
+        self.num_en_chars = datafile['num_en_chars']
+        self.num_dec_chars = datafile['num_dec_chars']
+        self.loadmodel()
+    # runwindow
+    def run(self):
+        self.window.mainloop()
+    def run2(self):
+        self.window.mainloop()
+    def demo_window(self):
+        self.window.title("Language Translator")
+        self.window.resizable(width=False, height=False)
+        self.window.configure(width=800, height=300)
+        # head_label.place(relwidth=1)
+        # line = Label(self.window,width=450,bg=BG_COLOR)
+        # line.place(relwidth=1,rely=0.07,relheight=0.012)
+        self.myText = StringVar()
+        head_label = Label(self.window, text="Translate to french Language!", font=FONT_BOLD, pady=10)
+        head_label.grid(row=0, column=3, padx=20, pady=20, columnspan=5)
+        head_label.grid_rowconfigure(1, weight=1)
+        head_label.grid_columnconfigure(1, weight=1)
+        Label(self.window, text="Input Text:").grid(row=1, padx=10, pady=10)
+        self.e1 = Entry(self.window)
+        self.e1.grid(row=2, column=1)
+        send_button2 = Button(self.window, text="Voice", font=FONT_BOLD, width=2, bg="Red",
+                              command=lambda: self.voice_input(None))
+        send_button2.grid(row=3, column=1, sticky=W + E + N + S, padx=20, pady=20)
+        Label(self.window, text="Translated text in French:").grid(row=1, column=4,sticky="W")
+        self.output_box = Label(self.window, width=20, text="", textvariable=self.myText)
+        self.output_box.grid(row=2, column=5)
+        self.e1 = Entry(self.window)
+        self.e1.grid(row=1, column=1)
+        send_button = Button(self.window, text="Translate", font=FONT_BOLD, width=2, bg="blue", command=lambda: self.on_enter(None))
+        send_button.grid(row=3, column=3, sticky=W + E + N + S, padx=20, pady=20)
+        send_button1 = Button(self.window, text="Voice Output", font=FONT_BOLD, bg="Red",
+                              command=lambda: self.on_enter_voice(None))
+        send_button1.grid(row=3, column=4, sticky=W + E + N + S, padx=20, pady=20)
+    def main_window(self):
+        # add title to window and configure it
+        self.window.title("Language Translator")
+        self.window.resizable(width=False, height=False)
+        self.window.configure(width=520, height=520, bg=BG_COLOR)
+        head_label = Label(self.window, bg=BG_COLOR, fg=TEXT_COLOR, text="Translate to french Language!",
+                           font=FONT_BOLD, pady=10)
+        head_label.place(relwidth=1)
+        line = Label(self.window, width=450, bg=BG_COLOR)
+        line.place(relwidth=1, rely=0.07, relheight=0.012)
+        # create text widget where input and output will be displayed
+        self.text_widget = Text(self.window, width=20, height=2, bg="#fff", fg="#000", font=FONT, padx=5, pady=5)
+        self.text_widget.place(relheight=0.745, relwidth=1, rely=0.08)
+        self.text_widget.configure(cursor="arrow", state=DISABLED)
+        # create scrollbar
+        scrollbar = Scrollbar(self.text_widget)
+        scrollbar.place(relheight=1, relx=0.974)
+        scrollbar.configure(command=self.text_widget.yview)
+        # create bottom label where text widget will placed
+        bottom_label = Label(self.window, bg=BG_GRAY, height=80)
+        bottom_label.place(relwidth=1, rely=0.825)
+        # this is for user to put english text
+        self.msg_entry = Entry(bottom_label, bg="#2C3E50", fg=TEXT_COLOR, font=FONT)
+        self.msg_entry.place(relwidth=0.65, relheight=0.06, rely=0.008, relx=0.008)
+        self.msg_entry.focus()
+        self.msg_entry.bind("<Return>", self.on_enter)
+        # send button which will call on_enter function to send the text
+        send_button2 = Button(bottom_label, text="Voice\n Input", font=FONT_BOLD, width=2, bg="Red",
+                              command=lambda: self.voice_input(None))
+        send_button2.place(relx=0.66, rely=0.008, relheight=0.06, relwidth=0.1325)
+        # send button which will call on_enter function to send the text
+        send_button = Button(bottom_label, text="Only Text", font=FONT_BOLD, width=8, bg="Red",
+                             command=lambda: self.on_enter(None))
+        send_button.place(relx=0.80, rely=0.008, relheight=0.03, relwidth=0.20)
+        # send button which will call on_enter function to send the text
+        send_button1 = Button(bottom_label, text="Voice", font=FONT_BOLD, width=2, bg="Red",
+                              command=lambda: self.on_enter_voice(None))
+        send_button1.place(relx=0.80, rely=0.04, relheight=0.027, relwidth=0.20)
+    def loadmodel(self):
+        # Inference model
+        # load the model
+        model = models.load_model("s2s")
+        # construct encoder model from the output of second layer
+        # discard the encoder output and store only states.
+        enc_outputs, state_h_enc, state_c_enc = model.layers[2].output  # lstm_1
+        # add input object and state from the layer.
+        self.en_model = Model(model.input[0], [state_h_enc, state_c_enc])
+        # create Input object for hidden and cell state for decoder
+        # shape of layer with hidden or latent dimension
+        dec_state_input_h = Input(shape=(256,), name="input_3")
+        dec_state_input_c = Input(shape=(256,), name="input_4")
+        dec_states_inputs = [dec_state_input_h, dec_state_input_c]
+        # add input from the encoder output and initialize with
+        # states.
+        dec_lstm = model.layers[3]
+        dec_outputs, state_h_dec, state_c_dec = dec_lstm(
+            model.input[1], initial_state=dec_states_inputs
+        )
+        dec_states = [state_h_dec, state_c_dec]
+        dec_dense = model.layers[4]
+        dec_outputs = dec_dense(dec_outputs)
+        # create Model with the input of decoder state input and encoder input
+        # and decoder output with the decoder states.
+        self.dec_model = Model(
+            [model.input[1]] + dec_states_inputs, [dec_outputs] + dec_states
+        )
+    def decode_sequence(self, input_seq):
+        # create dict object to get character from the index.
+        reverse_target_char_index = dict(enumerate(self.target_characters))
+        # get the states from the user input sequence
+        states_value = self.en_model.predict(input_seq)
+        # fit target characters and
+        # initialize every first character to be 1 which is '\t'.
+        # Generate empty target sequence of length 1.
+        co = cv.fit(self.target_characters)
+        target_seq = np.array([co.transform(list("\t")).toarray().tolist()], dtype="float32")
+        # if the iteration reaches the end of text than it will be stop the it
+        stop_condition = False
+        # append every predicted character in decoded sentence
+        decoded_sentence = ""
+        while not stop_condition:
+            # get predicted output and discard hidden and cell state.
+            output_chars, h, c = self.dec_model.predict([target_seq] + states_value)
+            # get the index and from dictionary get character from it.
+            char_index = np.argmax(output_chars[0, -1, :])
+            text_char = reverse_target_char_index[char_index]
+            decoded_sentence += text_char
+            # Exit condition: either hit max length
+            # or find stop character.
+            if text_char == "\n" or len(decoded_sentence) > self.max_target_length:
+                stop_condition = True
+            # update target sequence to the current character index.
+            target_seq = np.zeros((1, 1, self.num_dec_chars))
+            target_seq[0, 0, char_index] = 1.0
+            states_value = [h, c]
+        # return the decoded sentence
+        return decoded_sentence
+    def on_enter(self, event):
+        # get user query and bot response
+        msg = self.e1.get()
+        # self.my_msg(msg, "English")
+        self.deocded_output(msg)
+    def on_enter_voice(self, event):
+        # get user query and bot response
+        msg = self.output_box["text"]
+        # self.my_msg(msg, "English")
+        self.deocded_output_voice(msg)
+    def voice_input(self, event):
+        r = sr.Recognizer()
+        with sr.Microphone() as source:
+            audio = r.listen(source,phrase_time_limit=5)
+            try:
+                msg = r.recognize_google(audio)
+                # print(msg)
+                self.e1.insert(0, msg)
+                # self.msg_entry.insert(0,msg)
+            except:
+                print("Not working")
+                # tkinter.messagebox.showerror(title="Error", message="Sorry could not recognize what you said.")
+    def bagofcharacters(self, input_t):
+        cv = CountVectorizer(binary=True, tokenizer=lambda txt: txt.split(), stop_words=None, analyzer='char')
+        en_in_data = [];
+        pad_en = [1] + [0] * (len(self.input_characters) - 1)
+        cv_inp = cv.fit(self.input_characters)
+        en_in_data.append(cv_inp.transform(list(input_t)).toarray().tolist())
+        if len(input_t) < self.max_input_length:
+            for _ in range(self.max_input_length - len(input_t)):
+                en_in_data[0].append(pad_en)
+        return np.array(en_in_data, dtype="float32")
+    def deocded_output(self, msg):
+        # self.text_widget.configure(state=NORMAL)
+        # en_in_data = self.bagofcharacters(msg.lower() + ".")
+        # self.text_widget.insert(END, str(sender) + " : " + self.decode_sequence(en_in_data)
+        #                         + "\n\n")
+        # self.text_widget.configure(state=DISABLED)
+        # self.text_widget.see(END)
+        en_in_data = self.bagofcharacters(msg.lower())
+        # print(self.decode_sequence(en_in_data))
+        self.myText.set(self.decode_sequence(en_in_data))
+    def deocded_output_voice(self, msg):
+        # self.text_widget.configure(state=NORMAL)
+        # en_in_data = self.bagofcharacters(msg.lower() + ".")
+        # self.text_widget.insert(END, str(sender) + " : " + self.decode_sequence(en_in_data)
+        #                         + "\n\n")
+        engine = pyttsx3.init()
+        engine.setProperty("rate", 135)
+        engine.say(msg)
+        engine.runAndWait()
+        # self.text_widget.configure(state=DISABLED)
+        # self.text_widget.see(END)
+    def my_msg(self, msg, sender):
+        if not msg:
+            return
+        self.msg_entry.delete(0, END)
+        self.text_widget.configure(state=NORMAL)
+        self.text_widget.insert(END, str(sender) + " : " + str(msg) + "\n")
+        self.text_widget.configure(state=DISABLED)
+# run the file
+if __name__ == "__main__":
+    LT = LangTRans()
+    LT.run2()

demo.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# from tkinter import *
+# root = Tk()
+# root.geometry("500x500+0+0")
+# frmMain = Frame(root,bg="blue")
+#
+# startbutton = Button(frmMain, text="Start",height=1,width=4)
+# startbutton.grid()
+#
+# #Configure the row/col of our frame and root window to be resizable and fill all available space
+# frmMain.grid(row=0, column=0, sticky="NESW")
+# frmMain.grid_rowconfigure(0, weight=1)
+# frmMain.grid_columnconfigure(0, weight=1)
+# root.grid_rowconfigure(0, weight=1)
+# root.grid_columnconfigure(0, weight=1)
+#
+# root.mainloop()
+# import speech_recognition as sr
+# r = sr.Recognizer()
+# with sr.Microphone() as source:
+#     print("Speak Anything")
+#     audio = r.listen(source,phrase_time_limit=5)
+#     try:
+#         text = r.recognize_google(audio)
+#         print("You said : {}".format(text))
+#     except:
+#         print("Sorry could not recognize what you said")
+#Import tkinter library
+from tkinter import *
+#Create an instance of Tkinter frame or window
+win= Tk()
+#Set the geometry of tkinter frame
+win.geometry("750x250")
+#Make the window sticky for every case
+win.grid_rowconfigure(0, weight=1)
+win.grid_columnconfigure(0, weight=1)
+#Create a Label
+label=Label(win, text="This is a Centered Text",font=('Aerial 15 bold'))
+label.grid(row=2, column=0)
+label.grid_rowconfigure(1, weight=1)
+label.grid_columnconfigure(1, weight=1)
+win.mainloop()

eng-french.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

langTraining.py ADDED Viewed

	@@ -0,0 +1,125 @@

+#Load all the required modules.
+from tensorflow.keras.models import Model
+from tensorflow.keras import models
+from tensorflow.keras.utils import plot_model
+from tensorflow.keras.layers import Input,LSTM,Dense
+from sklearn.feature_extraction.text import CountVectorizer
+import numpy as np
+import pickle
+#initialize all variables
+input_texts=[]
+target_texts=[]
+input_characters=set()
+target_characters=set()
+#read dataset file
+with open('eng-french.txt','r',encoding='utf-8') as f:
+    rows=f.read().split('\n')
+#read first 10,000 rows from dataset
+for row in rows[:10000]:
+    #split input and target by '\t'=tab
+    input_text,target_text = row.split('\t')
+    #add '\t' at start and '\n' at end of text.
+    target_text='\t' + target_text + '\n'
+    input_texts.append(input_text.lower())
+    target_texts.append(target_text.lower())
+    #split character from text and add in respective sets
+    input_characters.update(list(input_text.lower()))
+    target_characters.update(list(target_text.lower()))
+#sort input and target characters
+input_characters = sorted(list(input_characters))
+target_characters = sorted(list(target_characters))
+#get the total length of input and target characters
+num_en_chars = len(input_characters)
+num_dec_chars = len(target_characters)
+#get the maximum length of input and target text.
+max_input_length = max([len(i) for i in input_texts])
+max_target_length = max([len(i) for i in target_texts])
+def bagofcharacters(input_texts,target_texts):
+  #inintialize encoder , decoder input and target data.
+  en_in_data=[] ; dec_in_data=[] ; dec_tr_data=[]
+  #padding variable with first character as 1 as rest all 0.
+  pad_en=[1]+[0]*(len(input_characters)-1)
+  pad_dec=[0]*(len(target_characters)) ; pad_dec[2]=1
+  #countvectorizer for one hot encoding as we want to tokenize character so
+  #anlyzer is true and None the stopwords action.
+  cv=CountVectorizer(binary=True,tokenizer=lambda txt: txt.split(),stop_words=None,analyzer='char')
+  for i,(input_t,target_t) in enumerate(zip(input_texts,target_texts)):
+    #fit the input characters into the CountVectorizer function
+    cv_inp= cv.fit(input_characters)
+    #transform the input text from the help of CountVectorizer fit.
+    #it character present than put 1 and 0 otherwise.
+    en_in_data.append(cv_inp.transform(list(input_t)).toarray().tolist())
+    cv_tar= cv.fit(target_characters)
+    dec_in_data.append(cv_tar.transform(list(target_t)).toarray().tolist())
+    #decoder target will be one timestep ahead because it will not consider
+    #the first character i.e. '\t'.
+    dec_tr_data.append(cv_tar.transform(list(target_t)[1:]).toarray().tolist())
+    #add padding variable if the length of the input or target text is smaller
+    #than their respective maximum input or target length.
+    if len(input_t) < max_input_length:
+      for _ in range(max_input_length-len(input_t)):
+        en_in_data[i].append(pad_en)
+    if len(target_t) < max_target_length:
+      for _ in range(max_target_length-len(target_t)):
+        dec_in_data[i].append(pad_dec)
+    if (len(target_t)-1) < max_target_length:
+      for _ in range(max_target_length-len(target_t)+1):
+        dec_tr_data[i].append(pad_dec)
+  #convert list to numpy array with data type float32
+  en_in_data=np.array(en_in_data,dtype="float32")
+  dec_in_data=np.array(dec_in_data,dtype="float32")
+  dec_tr_data=np.array(dec_tr_data,dtype="float32")
+  return en_in_data,dec_in_data,dec_tr_data
+#create input object of total number of encoder characters
+en_inputs = Input(shape=(None, num_en_chars))
+#create LSTM with the hidden dimension of 256
+#return state=True as we don't want output sequence.
+encoder = LSTM(256, return_state=True)
+#discard encoder output and store hidden and cell state.
+en_outputs, state_h, state_c = encoder(en_inputs)
+en_states = [state_h, state_c]
+#create input object of total number of decoder characters
+dec_inputs = Input(shape=(None, num_dec_chars))
+#create LSTM with the hidden dimension of 256
+#return state and return sequences as we want output sequence.
+dec_lstm = LSTM(256, return_sequences=True, return_state=True)
+#initialize the decoder model with the states on encoder.
+dec_outputs, _, _ = dec_lstm(dec_inputs, initial_state=en_states)
+#Output layer with shape of total number of decoder characters
+dec_dense = Dense(num_dec_chars, activation="softmax")
+dec_outputs = dec_dense(dec_outputs)
+#create Model and store all variables
+model = Model([en_inputs, dec_inputs], dec_outputs)
+pickle.dump({'input_characters':input_characters,'target_characters':target_characters,
+             'max_input_length':max_input_length,'max_target_length':max_target_length,
+             'num_en_chars':num_en_chars,'num_dec_chars':num_dec_chars},open("training_data.pkl","wb"))
+#load the data and train the model
+# en_in_data,dec_in_data,dec_tr_data = bagofcharacters(input_texts,target_texts)
+# model.compile(
+#     optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
+# )
+# model.fit(
+#     [en_in_data, dec_in_data],
+#     dec_tr_data,
+#     batch_size=64,
+#     epochs=200,
+#     validation_split=0.2,
+# )
+# Save model
+model.save("s2s")cd
+#summary and model plot
+model.summary()
+plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

s2s/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be4652d088201a57835ce518342d56905ac75859198c0b00d2d0ef37cd6b8ead
+size 14765

s2s/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f3c8c32dad3dec377098cd2a83875af88ecb781adf80a40fd83c34285a375ba
+size 1426649

s2s/variables/variables.data-00000-of-00001 ADDED Viewed

Binary file (2.64 MB). View file

s2s/variables/variables.index ADDED Viewed

Binary file (629 Bytes). View file

training_data.pkl ADDED Viewed

Binary file (537 Bytes). View file