DebasishDhal99 commited on
Commit
d043c48
1 Parent(s): 0a1267d

Updating the tokenizer method from nltk to regular splitting

Browse files
Files changed (1) hide show
  1. turkish.py +2 -1
turkish.py CHANGED
@@ -58,7 +58,8 @@ def turkish_word_to_latin(word):
58
 
59
 
60
  def turkish_sentence_to_latin(sentence):
61
- word_list = word_tokenize(sentence)
 
62
  processed_word_list = []
63
 
64
  for word in word_list:
 
58
 
59
 
60
  def turkish_sentence_to_latin(sentence):
61
+ # word_list = word_tokenize(sentence) #Nltk tokenizer didn't work out as it is also splitting by sep = "'" sometimes. İstanbul'u becomes İstanbul ' u
62
+ word_list = sentence.split(" ")
63
  processed_word_list = []
64
 
65
  for word in word_list: