azaninello commited on
Commit
9abc3fa
·
1 Parent(s): 0ab6f3c

Delete get_vars.py

Browse files
Files changed (1) hide show
  1. get_vars.py +0 -23
get_vars.py DELETED
@@ -1,23 +0,0 @@
1
- file = "text.txt"
2
-
3
- import nltk
4
- import simplemma
5
- from nltk.tokenize import word_tokenize
6
- from nltk.tokenize import sent_tokenize
7
- from nltk.probability import FreqDist
8
- from simplemma import text_lemmatizer
9
- nltk.download('punkt')
10
-
11
- def get_lists(file):
12
- with open(file, 'r', encoding='utf-8') as f:
13
- text = f.read()
14
-
15
- word_tokenized_text = word_tokenize(text, language='italian')
16
- word_tokenized_text_lower = [word.lower() for word in word_tokenized_text]
17
-
18
- sent_tokenized_text = sent_tokenize(text, language='italian')
19
- sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
20
-
21
- return word_tokenized_text, word_tokenized_text_lower, sent_tokenized_text, sent_tokenized_text_lower
22
-
23
- words, words_lower, sentences, sentences_lower = get_lists(file)