saim074 commited on
Commit
afe86b6
0 Parent(s):

Duplicate from saim074/perplexity

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +69 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Perplexity
3
+ emoji: ⚡
4
+ colorFrom: blue
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ sdk_version: 1.15.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: saim074/perplexity
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+ import nltk
4
+ nltk.download('punkt')
5
+ from nltk.tokenize import sent_tokenize
6
+ import streamlit as st
7
+
8
+ def load_model(model_id):
9
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
10
+ model = AutoModelForCausalLM.from_pretrained(model_id)
11
+ return tokenizer, model
12
+
13
+ model_id = "asi/gpt-fr-cased-small"
14
+ tokenizer_fr, model_fr = load_model(model_id)
15
+
16
+ model_id = "gpt2"
17
+ tokenizer_en, model_en = load_model(model_id)
18
+
19
+ model_id = "dbmdz/german-gpt2"
20
+ tokenizer_de, model_de = load_model(model_id)
21
+
22
+ with st.form(key='Form'):
23
+ text = st.text_area("Enter text here.")
24
+ option = st.selectbox('Select Language',('English', 'German', 'French'))
25
+ submitted = st.form_submit_button("Submit")
26
+
27
+ if submitted:
28
+ text = text.replace('\n', '')
29
+
30
+ with torch.no_grad():
31
+ if option == 'German':
32
+ encodings = tokenizer_de(text, return_tensors="pt")
33
+ input_ids = encodings.input_ids
34
+ target_ids = input_ids.clone()
35
+ loss = model_de(input_ids, labels=target_ids).loss
36
+ elif option == 'English':
37
+ encodings = tokenizer_en(text, return_tensors="pt")
38
+ input_ids = encodings.input_ids
39
+ target_ids = input_ids.clone()
40
+ loss = model_en(input_ids, labels=target_ids).loss
41
+ else:
42
+ encodings = tokenizer_fr(text, return_tensors="pt")
43
+ input_ids = encodings.input_ids
44
+ target_ids = input_ids.clone()
45
+ loss = model_fr(input_ids, labels=target_ids).loss
46
+
47
+ st.write("Entire Text")
48
+ st.write("Perplexity: ", round(float(torch.exp(loss)), 2))
49
+
50
+ for sentence in sent_tokenize(text):
51
+ st.write("________________________")
52
+ st.write(sentence)
53
+ with torch.no_grad():
54
+ if option == 'German':
55
+ encodings = tokenizer_de(sentence, return_tensors="pt")
56
+ input_ids = encodings.input_ids
57
+ target_ids = input_ids.clone()
58
+ loss = model_de(input_ids, labels=target_ids).loss
59
+ elif option == 'English':
60
+ encodings = tokenizer_en(sentence, return_tensors="pt")
61
+ input_ids = encodings.input_ids
62
+ target_ids = input_ids.clone()
63
+ loss = model_en(input_ids, labels=target_ids).loss
64
+ else:
65
+ encodings = tokenizer_fr(sentence, return_tensors="pt")
66
+ input_ids = encodings.input_ids
67
+ target_ids = input_ids.clone()
68
+ loss = model_fr(input_ids, labels=target_ids).loss
69
+ st.write("Perplexity: ", round(float(torch.exp(loss)), 2))
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ nltk